2: Half-period mode
4: Quarter-period mode
- wakeup-source: Boolean, rotary encoder can wake up the system.
+- rotary-encoder,encoding: String, the method used to encode steps.
+ Supported are "gray" (the default and more common) and "binary".
Deprecated properties:
- rotary-encoder,half-period: Makes the driver work on half-period mode.
compatible = "rotary-encoder";
gpios = <&gpio 19 1>, <&gpio 20 0>; /* GPIO19 is inverted */
linux,axis = <0>; /* REL_X */
+ rotary-encoder,encoding = "gray";
rotary-encoder,relative-axis;
};
gpios = <&gpio 21 0>, <&gpio 22 0>;
linux,axis = <1>; /* ABS_Y */
rotary-encoder,steps = <24>;
+ rotary-encoder,encoding = "binary";
rotary-encoder,rollover;
};
--- /dev/null
+* GSL 1680 touchscreen controller
+
+Required properties:
+- compatible : "silead,gsl1680"
+- reg : I2C slave address of the chip (0x40)
+- interrupt-parent : a phandle pointing to the interrupt controller
+ serving the interrupt for this chip
+- interrupts : interrupt specification for the gsl1680 interrupt
+- power-gpios : Specification for the pin connected to the gsl1680's
+ shutdown input. This needs to be driven high to take the
+ gsl1680 out of its low power state
+- touchscreen-size-x : See touchscreen.txt
+- touchscreen-size-y : See touchscreen.txt
+
+Optional properties:
+- touchscreen-inverted-x : See touchscreen.txt
+- touchscreen-inverted-y : See touchscreen.txt
+- touchscreen-swapped-x-y : See touchscreen.txt
+- silead,max-fingers : maximum number of fingers the touchscreen can detect
+
+Example:
+
+i2c@00000000 {
+ gsl1680: touchscreen@40 {
+ compatible = "silead,gsl1680";
+ reg = <0x40>;
+ interrupt-parent = <&pio>;
+ interrupts = <6 11 IRQ_TYPE_EDGE_FALLING>;
+ power-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>;
+ touchscreen-size-x = <480>;
+ touchscreen-size-y = <800>;
+ touchscreen-inverted-x;
+ touchscreen-swapped-x-y;
+ silead,max-fingers = <5>;
+ };
+};
--- /dev/null
+* SiS I2C Multiple Touch Controller
+
+Required properties:
+- compatible: must be "sis,9200-ts"
+- reg: i2c slave address
+- interrupt-parent: the phandle for the interrupt controller
+ (see interrupt binding [0])
+- interrupts: touch controller interrupt (see interrupt
+ binding [0])
+
+Optional properties:
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the
+ device (see pinctrl binding [1]).
+- attn-gpios: the gpio pin used as attention line
+- reset-gpios: the gpio pin used to reset the controller
+- wakeup-source: touchscreen can be used as a wakeup source
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+
+ sis9255@5c {
+ compatible = "sis,9200-ts";
+ reg = <0x5c>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sis>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <19 IRQ_TYPE_EDGE_FALLING>;
+ irq-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>;
+ };
sii Seiko Instruments, Inc.
silergy Silergy Corp.
sirf SiRF Technology, Inc.
+sis Silicon Integrated Systems Corp.
sitronix Sitronix Technology Corporation
skyworks Skyworks Solutions, Inc.
smsc Standard Microsystems Corporation
using these two parameters to set the minimum and
maximum port values.
+ sunrpc.svc_rpc_per_connection_limit=
+ [NFS,SUNRPC]
+ Limit the number of requests that the server will
+ process in parallel from a single connection.
+ The default value is 0 (no limit).
+
sunrpc.pool_mode=
[NFS]
Control how the NFS server code allocates CPUs to
L: cgroups@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
S: Maintained
-F: Documentation/cgroups/
+F: Documentation/cgroup*
F: include/linux/cgroup*
F: kernel/cgroup*
W: http://oss.sgi.com/projects/cpusets/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
S: Maintained
-F: Documentation/cgroups/cpusets.txt
+F: Documentation/cgroup-v1/cpusets.txt
F: include/linux/cpuset.h
F: kernel/cpuset.c
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/ibmvscsi/ibmvscsi*
-F: drivers/scsi/ibmvscsi/viosrp.h
+F: include/scsi/viosrp.h
+
+IBM Power Virtual SCSI Device Target Driver
+M: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+M: Michael Cyr <mikecyr@linux.vnet.ibm.com>
+L: linux-scsi@vger.kernel.org
+L: target-devel@vger.kernel.org
+S: Supported
+F: drivers/scsi/ibmvscsi_tgt/
IBM Power Virtual FC Device Drivers
M: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlxsw/
+SOFT-ROCE DRIVER (rxe)
+M: Moni Shoua <monis@mellanox.com>
+L: linux-rdma@vger.kernel.org
+S: Supported
+W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+Q: http://patchwork.kernel.org/project/linux-rdma/list/
+F: drivers/infiniband/hw/rxe/
+F: include/uapi/rdma/rdma_user_rxe.h
+
MEMBARRIER SUPPORT
M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Q: http://patchwork.ozlabs.org/project/rtc-linux/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
S: Maintained
+F: Documentation/devicetree/bindings/rtc/
F: Documentation/rtc.txt
F: drivers/rtc/
F: include/linux/rtc.h
F: include/uapi/linux/rtc.h
+F: include/linux/rtc/
+F: include/linux/platform_data/rtc-*
+F: tools/testing/selftests/timers/rtctest.c
REALTEK AUDIO CODECS
M: Bard Liao <bardliao@realtek.com>
+++ /dev/null
-#include <asm-generic/rtc.h>
#include <asm/gct.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-#include <asm/rtc.h>
#include <asm/vga.h>
#include "proto.h"
#include <linux/rtc.h>
#include <linux/platform_device.h>
-#include <asm/rtc.h>
-
#include "proto.h"
static int
alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
- __get_rtc_time(tm);
+ mc146818_get_time(tm);
/* Adjust for non-default epochs. It's easier to depend on the
generic __get_rtc_time and adjust the epoch here than create
tm = &xtm;
}
- return __set_rtc_time(tm);
+ return mc146818_set_time(tm);
}
static int
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/io.h>
-#include <linux/m48t86.h>
+#include <linux/platform_data/rtc-m48t86.h>
#include <linux/mtd/nand.h>
#include <linux/mtd/partitions.h>
#include <linux/platform_device.h>
#include <linux/mv643xx_eth.h>
#include <linux/ata_platform.h>
-#include <linux/m48t86.h>
+#include <linux/platform_data/rtc-m48t86.h>
#include <linux/mtd/nand.h>
#include <linux/mtd/partitions.h>
#include <linux/timeriomem-rng.h>
#include <linux/gpio.h>
#include <linux/delay.h>
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
#include <video/mbxfb.h>
#include <linux/spi/spi.h>
#include <linux/gpio.h>
#include <linux/dm9000.h>
#include <linux/leds.h>
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
#include <linux/pwm.h>
#include <linux/pwm_backlight.h>
#include <linux/delay.h>
#include <linux/dm9000.h>
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
#include <linux/mtd/nand.h>
#include <linux/mtd/partitions.h>
#include <linux/mtd/physmap.h>
+++ /dev/null
-/* mc146818rtc.h: RTC defs
- *
- * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-
-#endif /* _ASM_MC146818RTC_H */
+++ /dev/null
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _H8300_MC146818RTC_H
-#define _H8300_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c/ide-geometry.c */
-
-#endif /* _H8300_MC146818RTC_H */
+++ /dev/null
-#ifndef _ASM_IA64_MC146818RTC_H
-#define _ASM_IA64_MC146818RTC_H
-
-/*
- * Machine dependent access functions for RTC registers.
- */
-
-/* empty include file to satisfy the include in genrtc.c */
-
-#endif /* _ASM_IA64_MC146818RTC_H */
#include <asm/amigahw.h>
#include <asm/amigaints.h>
#include <asm/irq.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/apollohw.h>
#include <asm/irq.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
u_long sio01_physaddr;
#include <asm/setup.h>
#include <asm/irq.h>
#include <asm/traps.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
#include <asm/bvme6000hw.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/console.h>
+#include <linux/rtc.h>
#include <asm/bootinfo.h>
#include <asm/bootinfo-hp300.h>
#include <asm/blinken.h>
#include <asm/io.h> /* readb() and writeb() */
#include <asm/hp300hw.h>
-#include <asm/rtc.h>
#include "time.h"
/*
- * include/asm-m68knommu/flat.h -- uClinux flat-format executables
+ * flat.h -- uClinux flat-format executables
*/
#ifndef __M68KNOMMU_FLAT_H__
#define flat_argvp_envp_on_stack() 1
#define flat_old_ram_flag(flags) (flags)
#define flat_reloc_valid(reloc, size) ((reloc) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, p) get_unaligned(rp)
-#define flat_put_addr_at_rp(rp, val, relval) put_unaligned(val,rp)
+#define flat_get_addr_from_rp(rp, relval, flags, p) \
+ ({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
+#define flat_put_addr_at_rp(rp, val, relval) __put_user_unaligned(val, rp)
#define flat_get_relocate_addr(rel) (rel)
static inline int flat_set_persistent(unsigned long relval,
return 0;
}
+#define FLAT_PLAT_INIT(regs) \
+ do { \
+ if (current->mm) \
+ (regs)->d5 = current->mm->start_data; \
+ } while (0)
+
#endif /* __M68KNOMMU_FLAT_H__ */
#define setframeformat(_regs) do { } while (0)
#endif
-#ifdef CONFIG_MMU
/*
* Do necessary setup to start up a newly executed thread.
*/
wrusp(usp);
}
+#ifdef CONFIG_MMU
extern int handle_kernel_fault(struct pt_regs *regs);
-
#else
-
-#define start_thread(_regs, _pc, _usp) \
-do { \
- (_regs)->pc = (_pc); \
- setframeformat(_regs); \
- if (current->mm) \
- (_regs)->d5 = current->mm->start_data; \
- (_regs)->sr &= ~0x2000; \
- wrusp(_usp); \
-} while(0)
-
static inline int handle_kernel_fault(struct pt_regs *regs)
{
/* Any fault in kernel is fatal on non-mmu */
return 0;
}
-
#endif
/* Forward declaration, a strange C thing */
+++ /dev/null
-/* include/asm-m68k/rtc.h
- *
- * Copyright Richard Zidlicky
- * implementation details for genrtc/q40rtc driver
- */
-/* permission is hereby granted to copy, modify and redistribute this code
- * in terms of the GNU Library General Public License, Version 2 or later,
- * at your option.
- */
-
-#ifndef _ASM_RTC_H
-#define _ASM_RTC_H
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-#include <asm/errno.h>
-#include <asm/machdep.h>
-
-#define RTC_PIE 0x40 /* periodic interrupt enable */
-#define RTC_AIE 0x20 /* alarm interrupt enable */
-#define RTC_UIE 0x10 /* update-finished interrupt enable */
-
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100 /* battery bad */
-#define RTC_SQWE 0x08 /* enable square-wave output */
-#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-static inline unsigned int get_rtc_time(struct rtc_time *time)
-{
- /*
- * Only the values that we read from the RTC are set. We leave
- * tm_wday, tm_yday and tm_isdst untouched. Even though the
- * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
- * by the RTC when initially set to a non-zero value.
- */
- if (mach_hwclk)
- mach_hwclk(0, time);
- return RTC_24H;
-}
-
-static inline int set_rtc_time(struct rtc_time *time)
-{
- if (mach_hwclk)
- return mach_hwclk(1, time);
- return -EINVAL;
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
- if (mach_get_ss)
- return mach_get_ss();
- else{
- struct rtc_time h;
-
- get_rtc_time(&h);
- return h.tm_sec;
- }
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
- if (mach_get_rtc_pll)
- return mach_get_rtc_pll(pll);
- else
- return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
- if (mach_set_rtc_pll)
- return mach_set_rtc_pll(pll);
- else
- return -EINVAL;
-}
-#endif /* __KERNEL__ */
-
-#endif /* _ASM__RTC_H */
}
}
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+#if defined(CONFIG_ARCH_USES_GETTIMEOFFSET) && IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+ mach_hwclk(0, tm);
+ return rtc_valid_tm(tm);
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+ if (mach_hwclk(1, tm) < 0)
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static int rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct rtc_pll_info pll;
+ struct rtc_pll_info __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+ case RTC_PLL_GET:
+ if (!mach_get_rtc_pll || mach_get_rtc_pll(&pll))
+ return -EINVAL;
+ return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0;
+
+ case RTC_PLL_SET:
+ if (!mach_set_rtc_pll)
+ return -EINVAL;
+ if (!capable(CAP_SYS_TIME))
+ return -EACCES;
+ if (copy_from_user(&pll, argp, sizeof(pll)))
+ return -EFAULT;
+ return mach_set_rtc_pll(&pll);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+static const struct rtc_class_ops generic_rtc_ops = {
+ .ioctl = rtc_ioctl,
+ .read_time = rtc_generic_get_time,
+ .set_time = rtc_generic_set_time,
+};
static int __init rtc_init(void)
{
if (!mach_hwclk)
return -ENODEV;
- pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &generic_rtc_ops,
+ sizeof(generic_rtc_ops));
return PTR_ERR_OR_ZERO(pdev);
}
* Miscellaneous linux stuff
*/
+#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/platform_device.h>
#include <linux/adb.h>
#include <linux/cuda.h>
+#include <linux/rtc.h>
#include <asm/setup.h>
#include <asm/bootinfo.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/pgtable.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
#include <asm/macintosh.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-#include <asm/rtc.h>
#include <asm/segment.h>
#include <asm/setup.h>
#include <asm/macintosh.h>
#include <asm/setup.h>
#include <asm/irq.h>
#include <asm/traps.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
#include <asm/mvme147hw.h>
#include <asm/setup.h>
#include <asm/irq.h>
#include <asm/traps.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
#include <asm/mvme16xhw.h>
* for more details.
*/
+#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/platform_device.h>
#include <asm/io.h>
-#include <asm/rtc.h>
#include <asm/bootinfo.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/pgalloc.h>
#include <asm/sun3-head.h>
#include <asm/sun3mmu.h>
-#include <asm/rtc.h>
#include <asm/machdep.h>
#include <asm/machines.h>
#include <asm/idprom.h>
#include <linux/rtc.h>
#include <asm/errno.h>
-#include <asm/rtc.h>
#include <asm/intersil.h>
+#include <asm/machdep.h>
/* bits to set for start/run of the intersil */
#include <asm/irq.h>
#include <asm/io.h>
+#include <asm/machdep.h>
#include <asm/traps.h>
#include <asm/sun3x.h>
#include <asm/sun3ints.h>
-#include <asm/rtc.h>
#include "time.h"
" DCACHE [%2], %0\n"
#endif
"2:\n"
- : "=&d" (temp), "=&da" (retval)
+ : "=&d" (temp), "=&d" (retval)
: "da" (m), "bd" (old), "da" (new)
: "cc"
);
#define PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */
#define PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */
-#define PERFCTRL_INT 13 /* Internal core delailed events (see next) */
+#define PERFCTRL_INT 13 /* Internal core detailed events (see next) */
#define PERFCTRL_EXT 15 /* External source in core periphery */
#endif /* METAC_2_1 */
; is best to dump these registers immediately at the start of a routine
; using a MSETL or SETL instruction-
;
-; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected
+; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump arguments expected
;or SETL [A0StP+#8++],D0Ar2 ; Up to two 32-bit args expected
;
; For non-leaf routines it is always necessary to save and restore at least
#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
-#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
+#define CACHE_ASSOCIATIVITY 4 /* 4 way set-associative */
#define ICACHE 0
#define DCACHE 1
ADDCC D0Re0,D0Re0,#1 ! If yes result += 1
SUBCC D1Ar1,D1Ar1,D1Re0 ! and A -= Bu
ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result?
- NEG D0Ar2,D0Re0 ! Calulate neg result
+ NEG D0Ar2,D0Re0 ! Calculate neg result
MOVMI D0Re0,D0Ar2 ! Yes: Take neg result
$LIDMCRet:
MOV PC,D1RtP
LSR D1Re0, D1Re0, #1 ! Shift down B
BNZ $LIDMCLoop ! Was single bit in curbit lost?
ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result?
- NEG D0Ar2,D0Re0 ! Calulate neg result
+ NEG D0Ar2,D0Re0 ! Calculate neg result
MOVMI D0Re0,D0Ar2 ! Yes: Take neg result
MOV PC,D1RtP
.size ___divsi3,.-___divsi3
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) {
- pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
+ printk("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
regs->ctx.CurrPC, regs->ctx.AX[0].U0,
*/
#include <linux/linkage.h>
#include <linux/init.h>
-#include <linux/ds1286.h>
+#include <linux/rtc/ds1286.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <asm/sni.h>
#include <asm/time.h>
-#include <asm-generic/rtc.h>
#define SNI_CLOCK_TICK_RATE 3686400
#define SNI_COUNTER2_DIV 64
config MN10300_RTC
bool "Using MN10300 RTC"
depends on MN10300_PROC_MN103E010 || MN10300_PROC_MN2WS0050
- select GENERIC_CMOS_UPDATE
+ select RTC_CLASS
+ select RTC_DRV_CMOS
+ select RTC_SYSTOHC
default n
help
This option enables support for the RTC, thus enabling time to be
#define RTC_PORT(x) 0xd8600000
#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
-#define CMOS_READ(addr) __SYSREG(0xd8600000 + (addr), u8)
+#define CMOS_READ(addr) __SYSREG(0xd8600000 + (u32)(addr), u8)
#define CMOS_WRITE(val, addr) \
- do { __SYSREG(0xd8600000 + (addr), u8) = val; } while (0)
+ do { __SYSREG(0xd8600000 + (u32)(addr), u8) = val; } while (0)
#define RTC_IRQ RTIRQ
#endif /* !CONFIG_MN10300_RTC */
-#include <asm-generic/rtc.h>
-
#endif /* _ASM_RTC_H */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
-#include <linux/bcd.h>
-#include <linux/timex.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+
#include <asm/rtc-regs.h>
#include <asm/rtc.h>
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-/*
- * Read the current RTC time
- */
-void read_persistent_clock(struct timespec *ts)
-{
- struct rtc_time tm;
-
- get_rtc_time(&tm);
-
- ts->tv_nsec = 0;
- ts->tv_sec = mktime(tm.tm_year, tm.tm_mon, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec);
-
- /* if rtc is way off in the past, set something reasonable */
- if (ts->tv_sec < 0)
- ts->tv_sec = mktime(2009, 1, 1, 12, 0, 0);
-}
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
- * ms after the second nowtime has started, because when nowtime is written
- * into the registers of the CMOS clock, it will jump to the next second
- * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
- * sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- * sets the minutes. Usually you'll only notice that after reboot!
- */
-static int set_rtc_mmss(unsigned long nowtime)
-{
- unsigned char save_control, save_freq_select;
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
-
- /* gets recalled with irq locally disabled */
- spin_lock(&rtc_lock);
- save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being
- * set */
- CMOS_WRITE(save_control | RTC_SET, RTC_CONTROL);
-
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset
- * prescaler */
- CMOS_WRITE(save_freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- cmos_minutes = bcd2bin(cmos_minutes);
-
- /*
- * since we're only adjusting minutes and seconds,
- * don't interfere with hour overflow. This avoids
- * messing with unknown time zones but requires your
- * RTC not to be off by more than 15 minutes
- */
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
- /* correct for half hour time zone */
- real_minutes += 30;
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) < 30) {
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- real_seconds = bin2bcd(real_seconds);
- real_minutes = bin2bcd(real_minutes);
- }
- CMOS_WRITE(real_seconds, RTC_SECONDS);
- CMOS_WRITE(real_minutes, RTC_MINUTES);
- } else {
- printk_once(KERN_NOTICE
- "set_rtc_mmss: can't update from %d to %d\n",
- cmos_minutes, real_minutes);
- retval = -1;
- }
-
- /* The following flags have to be released exactly in this order,
- * otherwise the DS12887 (popular MC146818A clone with integrated
- * battery and quartz) will not reset the oscillator and will not
- * update precisely 500 ms later. You won't find this mentioned in
- * the Dallas Semiconductor data sheets, but who believes data
- * sheets anyway ... -- Markus Kuhn
- */
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- spin_unlock(&rtc_lock);
-
- return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
- return set_rtc_mmss(now.tv_sec);
-}
+static const __initdata struct resource res[] = {
+ DEFINE_RES_IO(RTC_PORT(0), RTC_IO_EXTENT),
+ DEFINE_RES_IRQ(RTC_IRQ),
+};
/*
* calibrate the TSC clock against the RTC
RTCRA |= RTCRA_DVR;
RTCRA &= ~RTCRA_DVR;
RTCRB &= ~RTCRB_SET;
+
+ platform_device_register_simple("rtc_cmos", -1, res, ARRAY_SIZE(res));
}
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <asm/cacheflush.h>
#include <asm/fpu.h>
+#include <asm/irq.h>
#include <asm/rtc.h>
#include <asm/busctl-regs.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/io.h>
select TTY # Needed for pdc_cons.c
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HASH
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_UNSTABLE_SCHED_CLOCK if (SMP || !64BIT)
--- /dev/null
+#ifndef _ASM_HASH_H
+#define _ASM_HASH_H
+
+/*
+ * HP-PA only implements integer multiply in the FPU. However, for
+ * integer multiplies by constant, it has a number of shift-and-add
+ * (but no shift-and-subtract, sigh!) instructions that a compiler
+ * can synthesize a code sequence with.
+ *
+ * Unfortunately, GCC isn't very efficient at using them. For example
+ * it uses three instructions for "x *= 21" when only two are needed.
+ * But we can find a sequence manually.
+ */
+
+#define HAVE_ARCH__HASH_32 1
+
+/*
+ * This is a multiply by GOLDEN_RATIO_32 = 0x61C88647 optimized for the
+ * PA7100 pairing rules. This is an in-order 2-way superscalar processor.
+ * Only one instruction in a pair may be a shift (by more than 3 bits),
+ * but other than that, simple ALU ops (including shift-and-add by up
+ * to 3 bits) may be paired arbitrarily.
+ *
+ * PA8xxx processors also dual-issue ALU instructions, although with
+ * fewer constraints, so this schedule is good for them, too.
+ *
+ * This 6-step sequence was found by Yevgen Voronenko's implementation
+ * of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html.
+ */
+static inline u32 __attribute_const__ __hash_32(u32 x)
+{
+ u32 a, b, c;
+
+ /*
+ * Phase 1: Compute a = (x << 19) + x,
+ * b = (x << 9) + a, c = (x << 23) + b.
+ */
+ a = x << 19; /* Two shifts can't be paired */
+ b = x << 9; a += x;
+ c = x << 23; b += a;
+ c += b;
+ /* Phase 2: Return (b<<11) + (c<<6) + (a<<3) - c */
+ b <<= 11;
+ a += c << 3; b -= c;
+ return (a << 3) + b;
+}
+
+#if BITS_PER_LONG == 64
+
+#define HAVE_ARCH_HASH_64 1
+
+/*
+ * Finding a good shift-and-add chain for GOLDEN_RATIO_64 is tricky,
+ * because available software for the purpose chokes on constants this
+ * large. (It's mostly designed for compiling FIR filter coefficients
+ * into FPGAs.)
+ *
+ * However, Jason Thong pointed out a work-around. The Hcub software
+ * (http://spiral.ece.cmu.edu/mcm/gen.html) is designed for *multiple*
+ * constant multiplication, and is good at finding shift-and-add chains
+ * which share common terms.
+ *
+ * Looking at 0x0x61C8864680B583EB in binary:
+ * 0110000111001000100001100100011010000000101101011000001111101011
+ * \______________/ \__________/ \_______/ \________/
+ * \____________________________/ \____________________/
+ * you can see the non-zero bits are divided into several well-separated
+ * blocks. Hcub can find algorithms for those terms separately, which
+ * can then be shifted and added together.
+ *
+ * Dividing the input into 2, 3 or 4 blocks, Hcub can find solutions
+ * with 10, 9 or 8 adds, respectively, making a total of 11 for the
+ * whole number.
+ *
+ * Using just two large blocks, 0xC3910C8D << 31 in the high bits,
+ * and 0xB583EB in the low bits, produces as good an algorithm as any,
+ * and with one more small shift than alternatives.
+ *
+ * The high bits are a larger number and more work to compute, as well
+ * as needing one extra cycle to shift left 31 bits before the final
+ * addition, so they are the critical path for scheduling. The low bits
+ * can fit into the scheduling slots left over.
+ */
+
+
+/*
+ * This _ASSIGN(dst, src) macro performs "dst = src", but prevents GCC
+ * from inferring anything about the value assigned to "dest".
+ *
+ * This prevents it from mis-optimizing certain sequences.
+ * In particular, gcc is annoyingly eager to combine consecutive shifts.
+ * Given "x <<= 19; y += x; z += x << 1;", GCC will turn this into
+ * "y += x << 19; z += x << 20;" even though the latter sequence needs
+ * an additional instruction and temporary register.
+ *
+ * Because no actual assembly code is generated, this construct is
+ * usefully portable across all GCC platforms, and so can be test-compiled
+ * on non-PA systems.
+ *
+ * In two places, additional unused input dependencies are added. This
+ * forces GCC's scheduling so it does not rearrange instructions too much.
+ * Because the PA-8xxx is out of order, I'm not sure how much this matters,
+ * but why make it more difficult for the processor than necessary?
+ */
+#define _ASSIGN(dst, src, ...) asm("" : "=r" (dst) : "0" (src), ##__VA_ARGS__)
+
+/*
+ * Multiply by GOLDEN_RATIO_64 = 0x0x61C8864680B583EB using a heavily
+ * optimized shift-and-add sequence.
+ *
+ * Without the final shift, the multiply proper is 19 instructions,
+ * 10 cycles and uses only 4 temporaries. Whew!
+ *
+ * You are not expected to understand this.
+ */
+static __always_inline u32 __attribute_const__
+hash_64(u64 a, unsigned int bits)
+{
+ u64 b, c, d;
+
+ /*
+ * Encourage GCC to move a dynamic shift to %sar early,
+ * thereby freeing up an additional temporary register.
+ */
+ if (!__builtin_constant_p(bits))
+ asm("" : "=q" (bits) : "0" (64 - bits));
+ else
+ bits = 64 - bits;
+
+ _ASSIGN(b, a*5); c = a << 13;
+ b = (b << 2) + a; _ASSIGN(d, a << 17);
+ a = b + (a << 1); c += d;
+ d = a << 10; _ASSIGN(a, a << 19);
+ d = a - d; _ASSIGN(a, a << 4, "X" (d));
+ c += b; a += b;
+ d -= c; c += a << 1;
+ a += c << 3; _ASSIGN(b, b << (7+31), "X" (c), "X" (d));
+ a <<= 31; b += d;
+ a += b;
+ return a >> bits;
+}
+#undef _ASSIGN /* We're a widely-used header file, so don't litter! */
+
+#endif /* BITS_PER_LONG == 64 */
+
+#endif /* _ASM_HASH_H */
+++ /dev/null
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c */
-
-#endif /* _ASM_MC146818RTC_H */
+++ /dev/null
-/*
- * include/asm-parisc/rtc.h
- *
- * Copyright 2002 Randolph CHung <tausq@debian.org>
- *
- * Based on: include/asm-ppc/rtc.h and the genrtc driver in the
- * 2.4 parisc linux tree
- */
-
-#ifndef __ASM_RTC_H__
-#define __ASM_RTC_H__
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-
-#include <asm/pdc.h>
-
-#define SECS_PER_HOUR (60 * 60)
-#define SECS_PER_DAY (SECS_PER_HOUR * 24)
-
-
-#define RTC_PIE 0x40 /* periodic interrupt enable */
-#define RTC_AIE 0x20 /* alarm interrupt enable */
-#define RTC_UIE 0x10 /* update-finished interrupt enable */
-
-#define RTC_BATT_BAD 0x100 /* battery bad */
-
-/* some dummy definitions */
-#define RTC_SQWE 0x08 /* enable square-wave output */
-#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-# define __isleap(year) \
- ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-
-/* How many days come before each month (0-12). */
-static const unsigned short int __mon_yday[2][13] =
-{
- /* Normal years. */
- { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
- /* Leap years. */
- { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
-};
-
-static inline unsigned int get_rtc_time(struct rtc_time *wtime)
-{
- struct pdc_tod tod_data;
- long int days, rem, y;
- const unsigned short int *ip;
-
- memset(wtime, 0, sizeof(*wtime));
- if (pdc_tod_read(&tod_data) < 0)
- return RTC_24H | RTC_BATT_BAD;
-
- // most of the remainder of this function is:
-// Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc.
-// This was originally a part of the GNU C Library.
-// It is distributed under the GPL, and was swiped from offtime.c
-
-
- days = tod_data.tod_sec / SECS_PER_DAY;
- rem = tod_data.tod_sec % SECS_PER_DAY;
-
- wtime->tm_hour = rem / SECS_PER_HOUR;
- rem %= SECS_PER_HOUR;
- wtime->tm_min = rem / 60;
- wtime->tm_sec = rem % 60;
-
- y = 1970;
-
-#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
-#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
-
- while (days < 0 || days >= (__isleap (y) ? 366 : 365))
- {
- /* Guess a corrected year, assuming 365 days per year. */
- long int yg = y + days / 365 - (days % 365 < 0);
-
- /* Adjust DAYS and Y to match the guessed year. */
- days -= ((yg - y) * 365
- + LEAPS_THRU_END_OF (yg - 1)
- - LEAPS_THRU_END_OF (y - 1));
- y = yg;
- }
- wtime->tm_year = y - 1900;
-
- ip = __mon_yday[__isleap(y)];
- for (y = 11; days < (long int) ip[y]; --y)
- continue;
- days -= ip[y];
- wtime->tm_mon = y;
- wtime->tm_mday = days + 1;
-
- return RTC_24H;
-}
-
-static int set_rtc_time(struct rtc_time *wtime)
-{
- u_int32_t secs;
-
- secs = mktime(wtime->tm_year + 1900, wtime->tm_mon + 1, wtime->tm_mday,
- wtime->tm_hour, wtime->tm_min, wtime->tm_sec);
-
- if(pdc_tod_set(secs, 0) < 0)
- return -1;
- else
- return 0;
-
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
- struct rtc_time h;
-
- get_rtc_time(&h);
- return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_RTC_H__ */
retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_PCI_CONFIG_READ,
__pa(pdc_result), pci_addr, pci_size);
switch(pci_size) {
- case 1: *(u8 *) mem_addr = (u8) pdc_result[0];
- case 2: *(u16 *)mem_addr = (u16) pdc_result[0];
- case 4: *(u32 *)mem_addr = (u32) pdc_result[0];
+ case 1: *(u8 *) mem_addr = (u8) pdc_result[0]; break;
+ case 2: *(u16 *)mem_addr = (u16) pdc_result[0]; break;
+ case 4: *(u32 *)mem_addr = (u32) pdc_result[0]; break;
}
spin_unlock_irqrestore(&pdc_lock, flags);
*/
#include <linux/errno.h>
#include <linux/module.h>
+#include <linux/rtc.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/param.h>
per_cpu(cpu_data, cpu).it_value = next_tick;
}
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+ struct pdc_tod tod_data;
+
+ memset(tm, 0, sizeof(*tm));
+ if (pdc_tod_read(&tod_data) < 0)
+ return -EOPNOTSUPP;
+
+ /* we treat tod_sec as unsigned, so this can work until year 2106 */
+ rtc_time64_to_tm(tod_data.tod_sec, tm);
+ return rtc_valid_tm(tm);
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+ time64_t secs = rtc_tm_to_time64(tm);
+
+ if (pdc_tod_set(secs, 0) < 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static const struct rtc_class_ops rtc_generic_ops = {
+ .read_time = rtc_generic_get_time,
+ .set_time = rtc_generic_set_time,
+};
+
static int __init rtc_init(void)
{
struct platform_device *pdev;
- pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &rtc_generic_ops,
+ sizeof(rtc_generic_ops));
+
return PTR_ERR_OR_ZERO(pdev);
}
device_initcall(rtc_init);
+#endif
void read_persistent_clock(struct timespec *ts)
{
}
static const struct iomap_ops ioport_ops = {
- ioport_read8,
- ioport_read16,
- ioport_read16,
- ioport_read32,
- ioport_read32,
- ioport_write8,
- ioport_write16,
- ioport_write16,
- ioport_write32,
- ioport_write32,
- ioport_read8r,
- ioport_read16r,
- ioport_read32r,
- ioport_write8r,
- ioport_write16r,
- ioport_write32r,
+ .read8 = ioport_read8,
+ .read16 = ioport_read16,
+ .read16be = ioport_read16,
+ .read32 = ioport_read32,
+ .read32be = ioport_read32,
+ .write8 = ioport_write8,
+ .write16 = ioport_write16,
+ .write16be = ioport_write16,
+ .write32 = ioport_write32,
+ .write32be = ioport_write32,
+ .read8r = ioport_read8r,
+ .read16r = ioport_read16r,
+ .read32r = ioport_read32r,
+ .write8r = ioport_write8r,
+ .write16r = ioport_write16r,
+ .write32r = ioport_write32r,
};
/* Legacy I/O memory ops */
}
static const struct iomap_ops iomem_ops = {
- iomem_read8,
- iomem_read16,
- iomem_read16be,
- iomem_read32,
- iomem_read32be,
- iomem_write8,
- iomem_write16,
- iomem_write16be,
- iomem_write32,
- iomem_write32be,
- iomem_read8r,
- iomem_read16r,
- iomem_read32r,
- iomem_write8r,
- iomem_write16r,
- iomem_write32r,
+ .read8 = iomem_read8,
+ .read16 = iomem_read16,
+ .read16be = iomem_read16be,
+ .read32 = iomem_read32,
+ .read32be = iomem_read32be,
+ .write8 = iomem_write8,
+ .write16 = iomem_write16,
+ .write16be = iomem_write16be,
+ .write32 = iomem_write32,
+ .write32be = iomem_write32be,
+ .read8r = iomem_read8r,
+ .read16r = iomem_read16r,
+ .read32r = iomem_read32r,
+ .write8r = iomem_write8r,
+ .write16r = iomem_write16r,
+ .write32r = iomem_write32r,
};
static const struct iomap_ops *iomap_ops[8] = {
depends on DEBUG_KERNEL
default n
+config JUMP_LABEL_FEATURE_CHECKS
+ bool "Enable use of jump label for cpu/mmu_has_feature()"
+ depends on JUMP_LABEL
+ default y
+ help
+ Selecting this options enables use of jump labels for some internal
+ feature checks. This should generate more optimal code for those
+ checks.
+
+config JUMP_LABEL_FEATURE_CHECK_DEBUG
+ bool "Do extra check on feature fixup calls"
+ depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
+ default n
+ help
+ This tries to catch incorrect usage of cpu_has_feature() and
+ mmu_has_feature() in the code.
+
+ If you don't know what this means, say N.
+
config FTR_FIXUP_SELFTEST
bool "Run self-tests of the feature-fixup code"
depends on DEBUG_KERNEL
radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
+
+static inline int hstate_get_psize(struct hstate *hstate)
+{
+ unsigned long shift;
+
+ shift = huge_page_shift(hstate);
+ if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+ return MMU_PAGE_2M;
+ else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+ return MMU_PAGE_1G;
+ else {
+ WARN(1, "Wrong huge page shift\n");
+ return mmu_virtual_psize;
+ }
+}
#endif
#include <asm/book3s/64/pgtable.h>
#include <asm/bug.h>
#include <asm/processor.h>
+#include <asm/cpu_has_feature.h>
/*
* SLB
BUG();
}
+static inline unsigned long get_sllp_encoding(int psize)
+{
+ unsigned long sllp;
+
+ sllp = ((mmu_psize_defs[psize].sllp & SLB_VSID_L) >> 6) |
+ ((mmu_psize_defs[psize].sllp & SLB_VSID_LP) >> 4);
+ return sllp;
+}
+
#endif /* __ASSEMBLY__ */
/*
};
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-#ifdef CONFIG_PPC_RADIX_MMU
-#define radix_enabled() mmu_has_feature(MMU_FTR_RADIX)
-#else
-#define radix_enabled() (0)
-#endif
-
-
#endif /* __ASSEMBLY__ */
/* 64-bit classic hash table MMU */
extern int mmu_io_psize;
/* MMU initialization */
+void mmu_early_init_devtree(void);
+void hash__early_init_devtree(void);
+void radix__early_init_devtree(void);
extern void radix_init_native(void);
extern void hash__early_init_mmu(void);
extern void radix__early_init_mmu(void);
static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
- if (radix_enabled())
+ if (early_radix_enabled())
return radix__setup_initial_memory_limit(first_memblock_base,
first_memblock_size);
return hash__setup_initial_memory_limit(first_memblock_base,
first_memblock_size);
}
+
+extern int (*register_process_table)(unsigned long base, unsigned long page_size,
+ unsigned long tbl_size);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
{
}
-static inline void hash__flush_tlb_page_nohash(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
static inline void hash__flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
return mmu_psize_defs[psize].ap;
}
+extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- unsigned long ap, int nid);
extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize);
extern void radix__tlb_flush(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
extern void radix__flush_tlb_mm(struct mm_struct *mm);
extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- unsigned long ap, int nid);
extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize);
#else
#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
-#define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i)
+#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr)
#endif
extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
#include <asm/book3s/64/tlbflush-hash.h>
#include <asm/book3s/64/tlbflush-radix.h>
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ return radix__flush_pmd_tlb_range(vma, start, end);
+ return hash__flush_tlb_range(vma, start, end);
+}
+
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ if (radix_enabled())
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+ return hash__flush_tlb_range(vma, start, end);
+}
+
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
return hash__local_flush_tlb_page(vma, vmaddr);
}
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- if (radix_enabled())
- return radix__flush_tlb_page(vma, vmaddr);
- return hash__flush_tlb_page_nohash(vma, vmaddr);
-}
-
static inline void tlb_flush(struct mmu_gather *tlb)
{
if (radix_enabled())
#include <linux/mm.h>
#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
/*
* No cache flushing is required when address mappings are changed,
--- /dev/null
+#ifndef __ASM_POWERPC_CPUFEATURES_H
+#define __ASM_POWERPC_CPUFEATURES_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+#include <asm/cputable.h>
+
+static inline bool early_cpu_has_feature(unsigned long feature)
+{
+ return !!((CPU_FTRS_ALWAYS & feature) ||
+ (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature));
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_CPU_FTR_KEYS 64
+
+extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
+
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+ int i;
+
+ BUILD_BUG_ON(!__builtin_constant_p(feature));
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+ if (!static_key_initialized) {
+ printk("Warning! cpu_has_feature() used prior to jump label init!\n");
+ dump_stack();
+ return early_cpu_has_feature(feature);
+ }
+#endif
+
+ if (CPU_FTRS_ALWAYS & feature)
+ return true;
+
+ if (!(CPU_FTRS_POSSIBLE & feature))
+ return false;
+
+ i = __builtin_ctzl(feature);
+ return static_branch_likely(&cpu_feature_keys[i]);
+}
+#else
+static inline bool cpu_has_feature(unsigned long feature)
+{
+ return early_cpu_has_feature(feature);
+}
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
#define __ASM_POWERPC_CPUTABLE_H
+#include <linux/types.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <uapi/asm/cputable.h>
extern const char *powerpc_base_platform;
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+extern void cpu_feature_keys_init(void);
+#else
+static inline void cpu_feature_keys_init(void) { }
+#endif
+
/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
enum {
TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
};
#endif /* __powerpc64__ */
-static inline int cpu_has_feature(unsigned long feature)
-{
- return (CPU_FTRS_ALWAYS & feature) ||
- (CPU_FTRS_POSSIBLE
- & cur_cpu_spec->cpu_features
- & feature);
-}
-
#define HBP_NUM 1
#endif /* !__ASSEMBLY__ */
#include <asm/div64.h>
#include <asm/time.h>
#include <asm/param.h>
+#include <asm/cpu_has_feature.h>
typedef u64 __nocast cputime_t;
typedef u64 __nocast cputime64_t;
#include <linux/threads.h>
#include <asm/ppc-opcode.h>
+#include <asm/cpu_has_feature.h>
#define PPC_DBELL_MSG_BRDCAST (0x04000000)
#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36))
#include <linux/spinlock.h>
#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
typedef struct {
unsigned int base;
{
pte_t pte;
pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
- flush_tlb_page(vma, addr);
+ flush_hugetlb_page(vma, addr);
}
static inline int huge_pte_none(pte_t pte)
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
- "nop\n\t"
+ "nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
- "b %l[l_yes]\n\t"
+ "b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
".popsection \n\t"
switch (b_psize) {
case MMU_PAGE_4K:
- sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(a_psize);
rb |= sllp << 5; /* AP field */
rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
break;
#ifdef CONFIG_ARCH_RANDOM
int (*get_random_seed)(unsigned long *v);
#endif
- int (*register_process_table)(unsigned long base, unsigned long page_size,
- unsigned long tbl_size);
};
extern void e500_idle(void);
#include <asm/cputable.h>
#include <linux/mm.h>
+#include <asm/cpu_has_feature.h>
/*
* This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
*/
/*
- * First half is MMU families
+ * MMU families
*/
#define MMU_FTR_HPTE_TABLE ASM_CONST(0x00000001)
#define MMU_FTR_TYPE_8xx ASM_CONST(0x00000002)
#define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010)
#define MMU_FTR_TYPE_47x ASM_CONST(0x00000020)
+/* Radix page table supported and enabled */
+#define MMU_FTR_TYPE_RADIX ASM_CONST(0x00000040)
+
/*
- * This is individual features
+ * Individual features below.
*/
+
/*
* We need to clear top 16bits of va (from the remaining 64 bits )in
* tlbie* instructions
*/
#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
-/*
- * Radix page table available
- */
-#define MMU_FTR_RADIX ASM_CONST(0x80000000)
-
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
#ifndef __ASSEMBLY__
+#include <linux/bug.h>
#include <asm/cputable.h>
#ifdef CONFIG_PPC_FSL_BOOK3E
MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
#ifdef CONFIG_PPC_RADIX_MMU
- MMU_FTR_RADIX |
+ MMU_FTR_TYPE_RADIX |
#endif
0,
};
-static inline int mmu_has_feature(unsigned long feature)
+static inline bool early_mmu_has_feature(unsigned long feature)
{
- return (MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+ return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_MMU_FTR_KEYS 32
+
+extern struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS];
+
+extern void mmu_feature_keys_init(void);
+
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+ int i;
+
+ BUILD_BUG_ON(!__builtin_constant_p(feature));
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+ if (!static_key_initialized) {
+ printk("Warning! mmu_has_feature() used prior to jump label init!\n");
+ dump_stack();
+ return early_mmu_has_feature(feature);
+ }
+#endif
+
+ if (!(MMU_FTRS_POSSIBLE & feature))
+ return false;
+
+ i = __builtin_ctzl(feature);
+ return static_branch_likely(&mmu_feature_keys[i]);
}
static inline void mmu_clear_feature(unsigned long feature)
{
+ int i;
+
+ i = __builtin_ctzl(feature);
cur_cpu_spec->mmu_features &= ~feature;
+ static_branch_disable(&mmu_feature_keys[i]);
}
+#else
+
+static inline void mmu_feature_keys_init(void)
+{
+
+}
+
+static inline bool mmu_has_feature(unsigned long feature)
+{
+ return early_mmu_has_feature(feature);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+ cur_cpu_spec->mmu_features &= ~feature;
+}
+#endif /* CONFIG_JUMP_LABEL */
extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
}
#endif /* !CONFIG_DEBUG_VM */
+#ifdef CONFIG_PPC_RADIX_MMU
+static inline bool radix_enabled(void)
+{
+ return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+
+static inline bool early_radix_enabled(void)
+{
+ return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+#else
+static inline bool radix_enabled(void)
+{
+ return false;
+}
+
+static inline bool early_radix_enabled(void)
+{
+ return false;
+}
+#endif
+
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
extern void early_init_mmu_secondary(void);
extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size);
+static inline void mmu_early_init_devtree(void) { }
#endif /* __ASSEMBLY__ */
#endif
# include <asm/mmu-8xx.h>
#endif
-#ifndef radix_enabled
-#define radix_enabled() (0)
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MMU_H_ */
__msr_check_and_clear(bits);
}
-static inline unsigned long mfvtb (void)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- return mfspr(SPRN_VTB);
-#endif
- return 0;
-}
-
#ifdef __powerpc64__
#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
#define mftb() ({unsigned long rval; \
+++ /dev/null
-/*
- * Real-time clock definitions and interfaces
- *
- * Author: Tom Rini <trini@mvista.com>
- *
- * 2002 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- *
- * Based on:
- * include/asm-m68k/rtc.h
- *
- * Copyright Richard Zidlicky
- * implementation details for genrtc/q40rtc driver
- *
- * And the old drivers/macintosh/rtc.c which was heavily based on:
- * Linux/SPARC Real Time Clock Driver
- * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
- *
- * With additional work by Paul Mackerras and Franz Sirl.
- */
-
-#ifndef __ASM_POWERPC_RTC_H__
-#define __ASM_POWERPC_RTC_H__
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-
-#include <asm/machdep.h>
-#include <asm/time.h>
-
-#define RTC_PIE 0x40 /* periodic interrupt enable */
-#define RTC_AIE 0x20 /* alarm interrupt enable */
-#define RTC_UIE 0x10 /* update-finished interrupt enable */
-
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100 /* battery bad */
-#define RTC_SQWE 0x08 /* enable square-wave output */
-#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-static inline unsigned int get_rtc_time(struct rtc_time *time)
-{
- if (ppc_md.get_rtc_time)
- ppc_md.get_rtc_time(time);
- return RTC_24H;
-}
-
-/* Set the current date and time in the real time clock. */
-static inline int set_rtc_time(struct rtc_time *time)
-{
- if (ppc_md.set_rtc_time)
- return ppc_md.set_rtc_time(time);
- return -EINVAL;
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
- struct rtc_time h;
-
- get_rtc_time(&h);
- return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_POWERPC_RTC_H__ */
static inline void __giveup_spe(struct task_struct *t) { }
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+extern void flush_tmregs_to_thread(struct task_struct *);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *t)
+{
+}
+#endif
+
static inline void clear_task_ebb(struct task_struct *t)
{
#ifdef CONFIG_PPC_BOOK3S_64
#include <linux/percpu.h>
#include <asm/processor.h>
+#include <asm/cpu_has_feature.h>
/* time.c */
extern unsigned long tb_ticks_per_jiffy;
{
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_ARCH_207S))
- return mfvtb();
+ return mfspr(SPRN_VTB);
#endif
return 0;
}
#endif
}
+#ifdef CONFIG_SMP
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+ return cpumask_subset(mm_cpumask(mm),
+ topology_sibling_cpumask(smp_processor_id()));
+}
+#else
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+ return 1;
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_TLB_H */
#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr)
#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i)
#endif
-#define flush_tlb_page_nohash(vma,addr) flush_tlb_page(vma,addr)
#elif defined(CONFIG_PPC_STD_MMU_32)
#ifdef CONFIG_ALTIVEC
#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
unsigned long *v2_in);
#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */
#define ELF_NFPREG 33 /* includes fpscr */
+#define ELF_NVMX 34 /* includes all vector registers */
+#define ELF_NVSX 32 /* includes all VSX registers */
+#define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */
+#define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */
+#define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */
typedef unsigned long elf_greg_t64;
typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
#include <asm/emulated_ops.h>
#include <asm/switch_to.h>
#include <asm/disassemble.h>
+#include <asm/cpu_has_feature.h>
struct aligninfo {
unsigned char len;
#include <linux/threads.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/jump_label.h>
#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
return NULL;
}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
+ [0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(cpu_feature_keys);
+
+void __init cpu_feature_keys_init(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_CPU_FTR_KEYS; i++) {
+ unsigned long f = 1ul << i;
+
+ if (!(cur_cpu_spec->cpu_features & f))
+ static_branch_disable(&cpu_feature_keys[i]);
+ }
+}
+
+struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = {
+ [0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(mmu_feature_keys);
+
+void __init mmu_feature_keys_init(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_MMU_FTR_KEYS; i++) {
+ unsigned long f = 1ul << i;
+
+ if (!(cur_cpu_spec->mmu_features & f))
+ static_branch_disable(&mmu_feature_keys[i]);
+ }
+}
+#endif
#ifdef CONFIG_PPC_STD_MMU_64
BEGIN_MMU_FTR_SECTION
b 2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
BEGIN_FTR_SECTION
clrrdi r6,r8,28 /* get its ESID */
clrrdi r9,r1,28 /* get current sp ESID */
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
.align 7
.globl h_data_storage_common
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
#ifdef CONFIG_PPC_STD_MMU_64
BEGIN_MMU_FTR_SECTION
bl slb_allocate_realmode
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
/* All done -- return from exception. */
beq- 2f
FTR_SECTION_ELSE
b 2f
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
.machine push
.machine "power4"
BEGIN_MMU_FTR_SECTION
b no_segments
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
/* Restore SLB from PACA */
ld r8,PACA_SLBSHADOWPTR(r13)
#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
* if we do a GET_PACA() before the feature fixups have been
* applied
*/
- if (cpu_has_feature(CPU_FTR_HVMODE))
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
mtspr(SPRN_SPRG_PACA, local_paca);
#include <asm/code-patching.h>
#include <asm/exec.h>
#include <asm/livepatch.h>
+#include <asm/cpu_has_feature.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#endif
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * Process self tracing is not yet supported through
+ * ptrace interface. Ptrace generic code should have
+ * prevented this from happening in the first place.
+ * Warn once here with the message, if some how it
+ * is attempted.
+ */
+ WARN_ONCE(tsk == current,
+ "Not expecting ptrace on self: TM regs may be incorrect\n");
+
+ /*
+ * If task is not current, it should have been flushed
+ * already to it's thread_struct during __switch_to().
+ */
+}
+#endif
+
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
*/
{CPU_FTR_TM_COMP, 0, 0,
PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
- {0, MMU_FTR_RADIX, 0, 0, 40, 0, 0},
+ {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0},
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
#endif
}
-static bool disable_radix;
-static int __init parse_disable_radix(char *p)
-{
- disable_radix = true;
- return 0;
-}
-early_param("disable_radix", parse_disable_radix);
-
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
*/
spinning_secondaries = boot_cpu_count - 1;
#endif
- /*
- * now fixup radix MMU mode based on kernel command line
- */
- if (disable_radix)
- cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX;
+
+ mmu_early_init_devtree();
#ifdef CONFIG_PPC_POWERNV
/* Scan and build the list of machine check recoverable ranges */
{.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
#define REG_OFFSET_END {.name = NULL, .offset = 0}
+#define TVSO(f) (offsetof(struct thread_vr_state, f))
+#define TFSO(f) (offsetof(struct thread_fp_state, f))
+#define TSO(f) (offsetof(struct thread_struct, f))
+
static const struct pt_regs_offset regoffset_table[] = {
GPR_OFFSET_NAME(0),
GPR_OFFSET_NAME(1),
return 0;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+ return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+ task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+ task->thread.ckpt_regs.trap = trap & 0xfff0;
+ return 0;
+}
+#endif
+
#ifdef CONFIG_PPC64
static int get_user_dscr(struct task_struct *task, unsigned long *data)
{
return ret;
}
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which returns the current running values of
+ * all the FPR registers, needs to know whether any transaction is active
+ * or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM
+ * which determines the final code in this function. All the combinations of
+ * these two config options are possible except the one below as transactional
+ * memory config pulls in CONFIG_VSX automatically.
+ *
+ * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+ */
static int fpr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
#endif
flush_fp_to_thread(target);
-#ifdef CONFIG_VSX
+#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+ /* copy to local buffer then write that out */
+ if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_TRANS_FPR(i);
+ buf[32] = target->thread.transact_fp.fpscr;
+ } else {
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+ }
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+#endif
+
+#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
/* copy to local buffer then write that out */
for (i = 0; i < 32 ; i++)
buf[i] = target->thread.TS_FPR(i);
buf[32] = target->thread.fp_state.fpscr;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+#endif
-#else
+#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
offsetof(struct thread_fp_state, fpr[32]));
#endif
}
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which setss the current running values of
+ * all the FPR registers, needs to know whether any transaction is active
+ * or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM
+ * which determines the final code in this function. All the combinations of
+ * these two config options are possible except the one below as transactional
+ * memory config pulls in CONFIG_VSX automatically.
+ *
+ * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+ */
static int fpr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
#endif
flush_fp_to_thread(target);
-#ifdef CONFIG_VSX
+#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_TRANS_FPR(i) = buf[i];
+ target->thread.transact_fp.fpscr = buf[32];
+ } else {
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ target->thread.fp_state.fpscr = buf[32];
+ }
+ return 0;
+#endif
+
+#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
/* copy to local buffer then write that out */
i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
if (i)
target->thread.TS_FPR(i) = buf[i];
target->thread.fp_state.fpscr = buf[32];
return 0;
-#else
+#endif
+
+#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
offsetof(struct thread_fp_state, fpr[32]));
return target->thread.used_vr ? regset->n : 0;
}
+/*
+ * When the transaction is active, 'transact_vr' holds the current running
+ * value of all the VMX registers and 'vr_state' holds the last checkpointed
+ * value of all the VMX registers for the current transaction to fall back
+ * on in case it aborts. When transaction is not active 'vr_state' holds
+ * the current running state of all the VMX registers. So this function which
+ * gets the current running values of all the VMX registers, needs to know
+ * whether any transaction is active or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
static int vr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
+ struct thread_vr_state *addr;
int ret;
flush_altivec_to_thread(target);
BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
offsetof(struct thread_vr_state, vr[32]));
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+ flush_fp_to_thread(target);
+ flush_tmregs_to_thread(target);
+ addr = &target->thread.transact_vr;
+ } else {
+ addr = &target->thread.vr_state;
+ }
+#else
+ addr = &target->thread.vr_state;
+#endif
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
+ addr, 0,
33 * sizeof(vector128));
if (!ret) {
/*
u32 word;
} vrsave;
memset(&vrsave, 0, sizeof(vrsave));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ vrsave.word = target->thread.transact_vrsave;
+ else
+ vrsave.word = target->thread.vrsave;
+#else
vrsave.word = target->thread.vrsave;
+#endif
+
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
33 * sizeof(vector128), -1);
}
return ret;
}
+/*
+ * When the transaction is active, 'transact_vr' holds the current running
+ * value of all the VMX registers and 'vr_state' holds the last checkpointed
+ * value of all the VMX registers for the current transaction to fall back
+ * on in case it aborts. When transaction is not active 'vr_state' holds
+ * the current running state of all the VMX registers. So this function which
+ * sets the current running values of all the VMX registers, needs to know
+ * whether any transaction is active or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
static int vr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
+ struct thread_vr_state *addr;
int ret;
flush_altivec_to_thread(target);
BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
offsetof(struct thread_vr_state, vr[32]));
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+ flush_fp_to_thread(target);
+ flush_tmregs_to_thread(target);
+ addr = &target->thread.transact_vr;
+ } else {
+ addr = &target->thread.vr_state;
+ }
+#else
+ addr = &target->thread.vr_state;
+#endif
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
+ addr, 0,
33 * sizeof(vector128));
if (!ret && count > 0) {
/*
u32 word;
} vrsave;
memset(&vrsave, 0, sizeof(vrsave));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ vrsave.word = target->thread.transact_vrsave;
+ else
+ vrsave.word = target->thread.vrsave;
+#else
vrsave.word = target->thread.vrsave;
+#endif
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
33 * sizeof(vector128), -1);
- if (!ret)
+ if (!ret) {
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ target->thread.transact_vrsave = vrsave.word;
+ else
+ target->thread.vrsave = vrsave.word;
+#else
target->thread.vrsave = vrsave.word;
+#endif
+ }
}
return ret;
return target->thread.used_vsr ? regset->n : 0;
}
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which returns the current running values of
+ * all the FPR registers, needs to know whether any transaction is active
+ * or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
static int vsr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
u64 buf[32];
int ret, i;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+#endif
flush_vsx_to_thread(target);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.
+ transact_fp.fpr[i][TS_VSRLOWOFFSET];
+ } else {
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.
+ fp_state.fpr[i][TS_VSRLOWOFFSET];
+ }
+#else
for (i = 0; i < 32 ; i++)
buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+#endif
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
buf, 0, 32 * sizeof(double));
return ret;
}
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which sets the current running values of all
+ * the FPR registers, needs to know whether any transaction is active or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
static int vsr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
u64 buf[32];
int ret,i;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+#endif
flush_vsx_to_thread(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
buf, 0, 32 * sizeof(double));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+ for (i = 0; i < 32 ; i++)
+ target->thread.transact_fp.
+ fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ } else {
+ for (i = 0; i < 32 ; i++)
+ target->thread.fp_state.
+ fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ }
+#else
for (i = 0; i < 32 ; i++)
target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+#endif
return ret;
}
#endif /* CONFIG_SPE */
-
-/*
- * These are our native regset flavors.
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
*/
-enum powerpc_regset {
- REGSET_GPR,
- REGSET_FPR,
-#ifdef CONFIG_ALTIVEC
- REGSET_VMX,
-#endif
-#ifdef CONFIG_VSX
- REGSET_VSX,
-#endif
-#ifdef CONFIG_SPE
- REGSET_SPE,
-#endif
-};
-
-static const struct user_regset native_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .get = gpr_get, .set = gpr_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_VSX
- [REGSET_VSX] = {
- .core_note_type = NT_PPC_VSX, .n = 32,
- .size = sizeof(double), .align = sizeof(double),
- .active = vsr_active, .get = vsr_get, .set = vsr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-};
+static int tm_cgpr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
-static const struct user_regset_view user_ppc_native_view = {
- .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
- .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
-};
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
-#ifdef CONFIG_PPC64
-#include <linux/compat.h>
+ return regset->n;
+}
-static int gpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+static int tm_cgpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
{
- const unsigned long *regs = &target->thread.regs->gpr[0];
- compat_ulong_t *k = kbuf;
- compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
- int i;
+ int ret;
- if (target->thread.regs == NULL)
- return -EIO;
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
- if (!FULL_REGS(target->thread.regs)) {
- /* We have a partial register set. Fill 14-31 with bogus values */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
- pos /= sizeof(reg);
- count /= sizeof(reg);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_MSR; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, offsetof(struct pt_regs, msr));
+ if (!ret) {
+ unsigned long msr = get_user_ckpt_msr(target);
- if (count > 0 && pos == PT_MSR) {
- reg = get_user_msr(target);
- if (kbuf)
- *k++ = reg;
- else if (__put_user(reg, u++))
- return -EFAULT;
- ++pos;
- --count;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+ offsetof(struct pt_regs, msr),
+ offsetof(struct pt_regs, msr) +
+ sizeof(msr));
}
- if (kbuf)
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- PT_REGS_COUNT * sizeof(reg), -1);
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ offsetof(struct pt_regs, orig_gpr3),
+ sizeof(struct pt_regs));
+ if (!ret)
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ sizeof(struct pt_regs), -1);
+
+ return ret;
}
-static int gpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+/*
+ * tm_cgpr_set - set the CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+static int tm_cgpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
{
- unsigned long *regs = &target->thread.regs->gpr[0];
- const compat_ulong_t *k = kbuf;
- const compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
+ unsigned long reg;
+ int ret;
- if (target->thread.regs == NULL)
- return -EIO;
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
- CHECK_FULL_REGS(target->thread.regs);
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
- pos /= sizeof(reg);
- count /= sizeof(reg);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ ret = user_regset_copyin_ignore(
+ &pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_trap(target, reg);
+ }
+
+ if (!ret)
+ ret = user_regset_copyin_ignore(
+ &pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+static int tm_cfpr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+static int tm_cfpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+static int tm_cfpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ target->thread.fp_state.fpscr = buf[32];
+ return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+static int tm_cvmx_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'vr_state' and 'vr_save' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+static int tm_cvmx_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret) {
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.vrsave;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'vr_state' and 'vr_save' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+static int tm_cvmx_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.vrsave;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ if (!ret)
+ target->thread.vrsave = vrsave.word;
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+static int tm_cvsx_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+static int tm_cvsx_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+static int tm_cvsx_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ for (i = 0; i < 32 ; i++)
+ target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+static int tm_spr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+static int tm_spr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+static int tm_spr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_tmregs_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+static int tm_tar_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+static int tm_tar_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+static int tm_tar_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+static int tm_ppr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+
+static int tm_ppr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+static int tm_ppr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+static int tm_dscr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+static int tm_dscr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+static int tm_dscr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
+ return ret;
+}
+
+static int ppr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
+ return ret;
+}
+
+static int dscr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+ return ret;
+}
+static int dscr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+ return ret;
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+ return ret;
+}
+static int tar_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+ return ret;
+}
+
+static int ebb_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return regset->n;
+
+ return 0;
+}
+
+static int ebb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (!target->thread.used_ebb)
+ return -ENODATA;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbrr, 0, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbrr, 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbhr, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.bescr,
+ 2 * sizeof(unsigned long), 3 * sizeof(unsigned long));
+
+ return ret;
+}
+static int pmu_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pmu_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.siar, 0,
+ 5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.siar, 0,
+ sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sdar, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sier, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr2, 3 * sizeof(unsigned long),
+ 4 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr0, 4 * sizeof(unsigned long),
+ 5 * sizeof(unsigned long));
+ return ret;
+}
+#endif
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+ REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+ REGSET_SPE,
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ REGSET_TM_CGPR, /* TM checkpointed GPR registers */
+ REGSET_TM_CFPR, /* TM checkpointed FPR registers */
+ REGSET_TM_CVMX, /* TM checkpointed VMX registers */
+ REGSET_TM_CVSX, /* TM checkpointed VSX registers */
+ REGSET_TM_SPR, /* TM specific SPR registers */
+ REGSET_TM_CTAR, /* TM checkpointed TAR register */
+ REGSET_TM_CPPR, /* TM checkpointed PPR register */
+ REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+ REGSET_PPR, /* PPR register */
+ REGSET_DSCR, /* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ REGSET_TAR, /* TAR register */
+ REGSET_EBB, /* EBB registers */
+ REGSET_PMR, /* Performance Monitor Registers */
+#endif
+};
+
+static const struct user_regset native_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .get = gpr_get, .set = gpr_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ .core_note_type = NT_PPC_VSX, .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .get = vsr_get, .set = vsr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .get = ebb_get, .set = ebb_set
+ },
+ [REGSET_PMR] = {
+ .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pmu_active, .get = pmu_get, .set = pmu_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_native_view = {
+ .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#ifdef CONFIG_PPC64
+#include <linux/compat.h>
+
+static int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf, bool tm_active)
+{
+ const unsigned long *regs = &target->thread.regs->gpr[0];
+ const unsigned long *ckpt_regs;
+ compat_ulong_t *k = kbuf;
+ compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+ int i;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ ckpt_regs = &target->thread.ckpt_regs.gpr[0];
+#endif
+ if (tm_active) {
+ regs = ckpt_regs;
+ } else {
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /*
+ * We have a partial register set.
+ * Fill 14-31 with bogus values.
+ */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+ }
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_MSR; --count)
+ if (__put_user((compat_ulong_t) regs[pos++], u++))
+ return -EFAULT;
+
+ if (count > 0 && pos == PT_MSR) {
+ reg = get_user_msr(target);
+ if (kbuf)
+ *k++ = reg;
+ else if (__put_user(reg, u++))
+ return -EFAULT;
+ ++pos;
+ --count;
+ }
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ if (__put_user((compat_ulong_t) regs[pos++], u++))
+ return -EFAULT;
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ PT_REGS_COUNT * sizeof(reg), -1);
+}
+
+static int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf, bool tm_active)
+{
+ unsigned long *regs = &target->thread.regs->gpr[0];
+ unsigned long *ckpt_regs;
+ const compat_ulong_t *k = kbuf;
+ const compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ ckpt_regs = &target->thread.ckpt_regs.gpr[0];
+#endif
+
+ if (tm_active) {
+ regs = ckpt_regs;
+ } else {
+ regs = &target->thread.regs->gpr[0];
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+ }
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
regs[pos++] = *k++;
else
for (; count > 0 && pos < PT_MSR; --count) {
(PT_TRAP + 1) * sizeof(reg), -1);
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static int tm_cgpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 1);
+}
+
+static int tm_cgpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 1);
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+static int gpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
+static int gpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
/*
* These are the regset flavors matching the CONFIG_PPC32 native set.
*/
.active = evr_active, .get = evr_get, .set = evr_set
},
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active,
+ .get = tm_cgpr32_get, .set = tm_cgpr32_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .get = ebb_get, .set = ebb_set
+ },
+#endif
};
static const struct user_regset_view user_ppc_compat_view = {
#include <asm/hugetlb.h>
#include <asm/livepatch.h>
#include <asm/mmu_context.h>
+#include <asm/cpu_has_feature.h>
#include "setup.h"
#include <asm/serial.h>
#include <asm/udbg.h>
#include <asm/code-patching.h>
+#include <asm/cpu_has_feature.h>
#define DBG(fmt...)
opal_configure_cores();
/* Enable AIL if supported, and we are in hypervisor mode */
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
+ early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
}
*/
configure_exceptions();
- /* Initialize the hash table or TLB handling */
- early_init_mmu();
-
/* Apply all the dynamic patching */
apply_feature_fixups();
+ /* Initialize the hash table or TLB handling */
+ early_init_mmu();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
#include <asm/debug.h>
#include <asm/kexec.h>
#include <asm/asm-prototypes.h>
+#include <asm/cpu_has_feature.h>
#ifdef DEBUG
#include <asm/udbg.h>
#include <linux/irq_work.h>
#include <linux/clk-provider.h>
#include <linux/suspend.h>
+#include <linux/rtc.h>
#include <asm/trace.h>
#include <asm/io.h>
loops_per_jiffy = tb_ticks_per_jiffy;
}
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+ ppc_md.get_rtc_time(tm);
+ return rtc_valid_tm(tm);
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+ if (!ppc_md.set_rtc_time)
+ return -EOPNOTSUPP;
+
+ if (ppc_md.set_rtc_time(tm) < 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static const struct rtc_class_ops rtc_generic_ops = {
+ .read_time = rtc_generic_get_time,
+ .set_time = rtc_generic_set_time,
+};
+
static int __init rtc_init(void)
{
struct platform_device *pdev;
if (!ppc_md.get_rtc_time)
return -ENODEV;
- pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &rtc_generic_ops,
+ sizeof(rtc_generic_ops));
return PTR_ERR_OR_ZERO(pdev);
}
device_initcall(rtc_init);
+#endif
*/
#include <linux/types.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#endif
}
-void apply_feature_fixups(void)
+static unsigned long __initdata saved_cpu_features;
+static unsigned int __initdata saved_mmu_features;
+#ifdef CONFIG_PPC64
+static unsigned long __initdata saved_firmware_features;
+#endif
+
+void __init apply_feature_fixups(void)
{
- struct cpu_spec *spec = *PTRRELOC(&cur_cpu_spec);
+ struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec));
+
+ *PTRRELOC(&saved_cpu_features) = spec->cpu_features;
+ *PTRRELOC(&saved_mmu_features) = spec->mmu_features;
/*
* Apply the CPU-specific and firmware specific fixups to kernel text
PTRRELOC(&__stop___lwsync_fixup));
#ifdef CONFIG_PPC64
+ saved_firmware_features = powerpc_firmware_features;
do_feature_fixups(powerpc_firmware_features,
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
#endif
do_final_fixups();
+
+ /*
+ * Initialise jump label. This causes all the cpu/mmu_has_feature()
+ * checks to take on their correct polarity based on the current set of
+ * CPU/MMU features.
+ */
+ jump_label_init();
+ cpu_feature_keys_init();
+ mmu_feature_keys_init();
+}
+
+static int __init check_features(void)
+{
+ WARN(saved_cpu_features != cur_cpu_spec->cpu_features,
+ "CPU features changed after feature patching!\n");
+ WARN(saved_mmu_features != cur_cpu_spec->mmu_features,
+ "MMU features changed after feature patching!\n");
+#ifdef CONFIG_PPC64
+ WARN(saved_firmware_features != powerpc_firmware_features,
+ "Firmware features changed after feature patching!\n");
+#endif
+
+ return 0;
}
+late_initcall(check_features);
#ifdef CONFIG_FTR_FIXUP_SELFTEST
/* clear out bits after (52) [0....52.....63] */
va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
- sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
/* clear out bits after(52) [0....52.....63] */
va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
- sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
: : "r"(va) : "memory");
mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
if (cpu_has_feature(CPU_FTR_ARCH_300))
- ppc_md.register_process_table = native_register_proc_table;
+ register_process_table = native_register_proc_table;
}
return 0;
}
-static void __init htab_init_seg_sizes(void)
-{
- of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
-}
-
static int __init get_idx_from_shift(unsigned int shift)
{
int idx = -1;
#endif /* #ifdef CONFIG_PPC_64K_PAGES */
-static void __init htab_init_page_sizes(void)
+static void __init htab_scan_page_sizes(void)
{
int rc;
* Try to find the available page sizes in the device-tree
*/
rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
- if (rc != 0) /* Found */
- goto found;
-
- /*
- * Not in the device-tree, let's fallback on known size
- * list for 16M capable GP & GR
- */
- if (mmu_has_feature(MMU_FTR_16M_PAGE))
+ if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) {
+ /*
+ * Nothing in the device-tree, but the CPU supports 16M pages,
+ * so let's fallback on a known size list for 16M capable CPUs.
+ */
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
sizeof(mmu_psize_defaults_gp));
-found:
+ }
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Reserve 16G huge page memory sections for huge pages */
+ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static void __init htab_init_page_sizes(void)
+{
if (!debug_pagealloc_enabled()) {
/*
* Pick a size for the linear mapping. Currently, we only
,mmu_psize_defs[mmu_vmemmap_psize].shift
#endif
);
-
-#ifdef CONFIG_HUGETLB_PAGE
- /* Reserve 16G huge page memory sections for huge pages */
- of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
-#endif /* CONFIG_HUGETLB_PAGE */
}
static int __init htab_dt_scan_pftsize(unsigned long node,
DBG(" -> htab_initialize()\n");
- /* Initialize segment sizes */
- htab_init_seg_sizes();
-
- /* Initialize page sizes */
- htab_init_page_sizes();
-
if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
mmu_kernel_ssize = MMU_SEGSIZE_1T;
mmu_highuser_ssize = MMU_SEGSIZE_1T;
#undef KB
#undef MB
+void __init hash__early_init_devtree(void)
+{
+ /* Initialize segment sizes */
+ of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
+
+ /* Initialize page sizes */
+ htab_scan_page_sizes();
+}
+
void __init hash__early_init_mmu(void)
{
+ htab_init_page_sizes();
+
/*
* initialize page table size
*/
#include <asm/cacheflush.h>
#include <asm/machdep.h>
#include <asm/mman.h>
+#include <asm/tlb.h>
void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
- unsigned long ap, shift;
+ int psize;
struct hstate *hstate = hstate_file(vma->vm_file);
- shift = huge_page_shift(hstate);
- if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- ap = mmu_get_ap(MMU_PAGE_2M);
- else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
- ap = mmu_get_ap(MMU_PAGE_1G);
- else {
- WARN(1, "Wrong huge page shift\n");
- return ;
- }
- radix___flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+ psize = hstate_get_psize(hstate);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
}
void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
- unsigned long ap, shift;
+ int psize;
struct hstate *hstate = hstate_file(vma->vm_file);
- shift = huge_page_shift(hstate);
- if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- ap = mmu_get_ap(MMU_PAGE_2M);
- else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
- ap = mmu_get_ap(MMU_PAGE_1G);
- else {
- WARN(1, "Wrong huge page shift\n");
- return ;
- }
- radix___local_flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+ psize = hstate_get_psize(hstate);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ int psize;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ psize = hstate_get_psize(hstate);
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
}
/*
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+static bool disable_radix;
+static int __init parse_disable_radix(char *p)
+{
+ disable_radix = true;
+ return 0;
+}
+early_param("disable_radix", parse_disable_radix);
+
+void __init mmu_early_init_devtree(void)
+{
+ /* Disable radix mode based on kernel command line. */
+ if (disable_radix)
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+
+ if (early_radix_enabled())
+ radix__early_init_devtree();
+ else
+ hash__early_init_devtree();
+}
+#endif /* CONFIG_PPC_STD_MMU_64 */
#include "mmu_decl.h"
#include <trace/events/thp.h>
+int (*register_process_table)(unsigned long base, unsigned long page_size,
+ unsigned long tbl_size);
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
changed = !pmd_same(*(pmdp), entry);
if (changed) {
__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
}
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
* This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected.
* of process table here. But our linear mapping also enable us to use
* physical address here.
*/
- ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
+ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
}
void __init radix_init_native(void)
{
- ppc_md.register_process_table = native_register_process_table;
+ register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
return 1;
}
-static void __init radix_init_page_sizes(void)
+void __init radix__early_init_devtree(void)
{
int rc;
__pte_frag_nr = H_PTE_FRAG_NR;
__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
- radix_init_page_sizes();
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
lpcr = mfspr(SPRN_LPCR);
if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address);
__ptep_set_access_flags(ptep, entry);
- flush_tlb_page_nohash(vma, address);
+ flush_tlb_page(vma, address);
}
return changed;
}
}
EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
-void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- unsigned long ap, int nid)
+void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
{
unsigned long pid;
+ unsigned long ap = mmu_get_ap(psize);
preempt_disable();
pid = mm ? mm->context.id : 0;
if (vma && is_vm_hugetlb_page(vma))
return __local_flush_hugetlb_page(vma, vmaddr);
#endif
- radix___local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_get_ap(mmu_virtual_psize), 0);
+ radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
#ifdef CONFIG_SMP
-static int mm_is_core_local(struct mm_struct *mm)
-{
- return cpumask_subset(mm_cpumask(mm),
- topology_sibling_cpumask(smp_processor_id()));
-}
-
void radix__flush_tlb_mm(struct mm_struct *mm)
{
unsigned long pid;
}
EXPORT_SYMBOL(radix__flush_tlb_pwc);
-void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- unsigned long ap, int nid)
+void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
{
unsigned long pid;
+ unsigned long ap = mmu_get_ap(psize);
preempt_disable();
pid = mm ? mm->context.id : 0;
if (vma && is_vm_hugetlb_page(vma))
return flush_hugetlb_page(vma, vmaddr);
#endif
- radix___flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_get_ap(mmu_virtual_psize), 0);
+ radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__flush_tlb_page);
void radix__tlb_flush(struct mmu_gather *tlb)
{
+ int psize = 0;
struct mm_struct *mm = tlb->mm;
- radix__flush_tlb_mm(mm);
+ int page_size = tlb->page_size;
+
+ psize = radix_get_mmu_psize(page_size);
+ /*
+ * if page size is not something we understand, do a full mm flush
+ */
+ if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
+ else
+ radix__flush_tlb_mm(mm);
+}
+
+#define TLB_FLUSH_ALL -1UL
+/*
+ * Number of pages above which we will do a bcast tlbie. Just a
+ * number at this point copied from x86
+ */
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ unsigned long pid;
+ unsigned long addr;
+ int local = mm_is_core_local(mm);
+ unsigned long ap = mmu_get_ap(psize);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+ unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+
+
+ preempt_disable();
+ pid = mm ? mm->context.id : 0;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto err_out;
+
+ if (end == TLB_FLUSH_ALL ||
+ (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ goto err_out;
+ }
+ for (addr = start; addr < end; addr += page_size) {
+
+ if (local)
+ _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ else {
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+ }
+ }
+err_out:
+ preempt_enable();
}
void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
EXPORT_SYMBOL(radix__flush_tlb_lpid);
+
+void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
+}
+EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
}
EXPORT_SYMBOL(flush_hash_entry);
-/*
- * Called by ptep_set_access_flags, must flush on CPUs for which the
- * DSI handler can't just "fixup" the TLB on a write fault
- */
-void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
-{
- if (Hash != 0)
- return;
- _tlbie(addr);
-}
-
/*
* Called at the end of a mmu_gather operation to make sure the
* TLB flush is completely done.
static DEFINE_RAW_SPINLOCK(tlbivax_lock);
-static int mm_is_core_local(struct mm_struct *mm)
-{
- return cpumask_subset(mm_cpumask(mm),
- topology_sibling_cpumask(smp_processor_id()));
-}
-
struct tlb_flush_param {
unsigned long addr;
unsigned int pid;
/* Instruction Demand sectors wriittent into IL1 */
EVENT(PM_L1_DEMAND_WRITE, 0x0408c)
/* Instruction prefetch written into IL1 */
-EVENT(PM_IC_PREF_WRITE, 0x0408e)
+EVENT(PM_IC_PREF_WRITE, 0x0488c)
/* The data cache was reloaded from local core's L3 due to a demand load */
EVENT(PM_DATA_FROM_L3, 0x4c042)
/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
/* All successful D-side store dispatches for this thread */
-EVENT(PM_L2_ST, 0x16081)
+EVENT(PM_L2_ST, 0x16880)
/* All successful D-side store dispatches for this thread that were L2 Miss */
-EVENT(PM_L2_ST_MISS, 0x26081)
+EVENT(PM_L2_ST_MISS, 0x26880)
/* Total HW L3 prefetches(Load+store) */
EVENT(PM_L3_PREF_ALL, 0x4e052)
/* Data PTEG reload */
Uses information from the OF or flattened device tree to instantiate
platform devices for direct mapped RTC chips like the DS1742 or DS1743.
+config GEN_RTC
+ bool "Use the platform RTC operations from user space"
+ select RTC_CLASS
+ select RTC_DRV_GENERIC
+ help
+ This option provides backwards compatibility with the old gen_rtc.ko
+ module that was traditionally used for old PowerPC machines.
+ Platforms should migrate to enabling the RTC_DRV_GENERIC by hand
+ replacing their get_rtc_time/set_rtc_time callbacks with
+ a proper RTC device driver.
+
config SIMPLE_GPIO
bool "Support for simple, memory-mapped GPIO controllers"
depends on PPC
#include <asm/pgtable.h>
#include <asm/reg.h>
#include <asm/cell-regs.h>
+#include <asm/cpu_has_feature.h>
#include "pervasive.h"
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include <linux/rtc.h>
#include <asm/firmware.h>
-#include <asm/rtc.h>
#include <asm/lv1call.h>
#include <asm/ps3.h>
rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0);
if (!rmu_node) {
dev_err(&dev->dev, "No valid fsl,srio-rmu-handle property\n");
+ rc = -ENOENT;
goto err_rmu;
}
rc = of_address_to_resource(rmu_node, 0, &rmu_regs);
Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
#include "nonstdio.h"
#include "ansidecl.h"
#include "ppc.h"
+++ /dev/null
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#endif /* _ASM_MC146818RTC_H */
extern void (*rtc_sh_get_time)(struct timespec *);
extern int (*rtc_sh_set_time)(const time_t);
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100 /* battery bad */
-#define RTC_SQWE 0x08 /* enable square-wave output */
-#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-struct rtc_time;
-unsigned int get_rtc_time(struct rtc_time *);
-int set_rtc_time(struct rtc_time *);
-
#define RTC_CAP_4_DIGIT_YEAR (1 << 0)
struct sh_rtc_platform_info {
}
#endif
-unsigned int get_rtc_time(struct rtc_time *tm)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
{
- if (rtc_sh_get_time != null_rtc_get_time) {
- struct timespec tv;
+ struct timespec tv;
- rtc_sh_get_time(&tv);
- rtc_time_to_tm(tv.tv_sec, tm);
- }
-
- return RTC_24H;
+ rtc_sh_get_time(&tv);
+ rtc_time_to_tm(tv.tv_sec, tm);
+ return 0;
}
-EXPORT_SYMBOL(get_rtc_time);
-int set_rtc_time(struct rtc_time *tm)
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
{
unsigned long secs;
rtc_tm_to_time(tm, &secs);
- return rtc_sh_set_time(secs);
+ if ((rtc_sh_set_time == null_rtc_set_time) ||
+ (rtc_sh_set_time(secs) < 0))
+ return -EOPNOTSUPP;
+
+ return 0;
}
-EXPORT_SYMBOL(set_rtc_time);
+
+static const struct rtc_class_ops rtc_generic_ops = {
+ .read_time = rtc_generic_get_time,
+ .set_time = rtc_generic_set_time,
+};
static int __init rtc_generic_init(void)
{
if (rtc_sh_get_time == null_rtc_get_time)
return -ENODEV;
- pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &rtc_generic_ops,
+ sizeof(rtc_generic_ops));
+
return PTR_ERR_OR_ZERO(pdev);
}
struct pci_dev;
void pci_iounmap(struct pci_dev *dev, void __iomem *);
-
-
-/*
- * At the moment, we do not use CMOS_READ anywhere outside of rtc.c,
- * so rtc_port is static in it. This should not change unless a new
- * hardware pops up.
- */
-#define RTC_PORT(x) (rtc_port + (x))
-#define RTC_ALWAYS_BCD 0
-
static inline int sbus_can_dma_64bit(void)
{
return 0; /* actually, sparc_cpu_model==sun4d */
config UML
bool
default y
+ select ARCH_HAS_KCOV
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_UID16
select HAVE_FUTEX_CMPXCHG if FUTEX
+ select HAVE_DEBUG_KMEMLEAK
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
select GENERIC_IO
config PCMCIA
bool
-# Yet to do!
config TRACE_IRQFLAGS_SUPPORT
bool
- default n
+ default y
config LOCKDEP_SUPPORT
bool
extern void block_signals(void);
extern void unblock_signals(void);
+#define arch_local_save_flags arch_local_save_flags
static inline unsigned long arch_local_save_flags(void)
{
return get_signals();
}
+#define arch_local_irq_restore arch_local_irq_restore
static inline void arch_local_irq_restore(unsigned long flags)
{
set_signals(flags);
}
+#define arch_local_irq_enable arch_local_irq_enable
static inline void arch_local_irq_enable(void)
{
unblock_signals();
}
+#define arch_local_irq_disable arch_local_irq_disable
static inline void arch_local_irq_disable(void)
{
block_signals();
}
-static inline unsigned long arch_local_irq_save(void)
-{
- unsigned long flags;
- flags = arch_local_save_flags();
- arch_local_irq_disable();
- return flags;
-}
+#define ARCH_IRQ_DISABLED 0
+#define ARCh_IRQ_ENABLED (SIGIO|SIGVTALRM)
-static inline bool arch_irqs_disabled(void)
-{
- return arch_local_save_flags() == 0;
-}
+#include <asm-generic/irqflags.h>
#endif
# Licensed under the GPL
#
+# Don't instrument UML-specific code; without this, we may crash when
+# accessing the instrumentation buffer for the first time from the
+# kernel.
+KCOV_INSTRUMENT := n
+
CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \
-DELF_ARCH=$(LDS_ELF_ARCH) \
-DELF_FORMAT=$(LDS_ELF_FORMAT) \
}
area = alloc_bootmem(size);
- if (area == NULL)
- return 0;
if (load_initrd(initrd, area, size) == -1)
return 0;
start_vm = VMALLOC_START;
- setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
- mem_total_pages(physmem_size, iomem_size, highmem);
-
virtmem_size = physmem_size;
stack = (unsigned long) argv;
stack &= ~(1024 * 1024 - 1);
printf("Kernel virtual memory size shrunk to %lu bytes\n",
virtmem_size);
- stack_protections((unsigned long) &init_thread_info);
os_flush_stdout();
return start_uml();
void __init setup_arch(char **cmdline_p)
{
+ stack_protections((unsigned long) &init_thread_info);
+ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
+ mem_total_pages(physmem_size, iomem_size, highmem);
+
paging_init();
strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
# Licensed under the GPL
#
+# Don't instrument UML-specific code
+KCOV_INSTRUMENT := n
+
obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
registers.o sigio.o signal.o start_up.o time.o tty.o \
umid.o user_syms.o util.o drivers/ skas/
#include <kern_util.h>
#include <os.h>
#include <sysdep/mcontext.h>
+#include <um_malloc.h>
void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
[SIGTRAP] = relay_signal,
struct uml_pt_regs *r;
int save_errno = errno;
- r = malloc(sizeof(struct uml_pt_regs));
+ r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
if (!r)
panic("out of memory");
{
struct uml_pt_regs *regs;
- regs = malloc(sizeof(struct uml_pt_regs));
+ regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
if (!regs)
panic("out of memory");
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select PERF_EVENTS
select RTC_LIB
+ select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
select SYSCTL_EXCEPTION_TRACE
#include <asm/io.h>
#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
#ifndef RTC_PORT
#define RTC_PORT(x) (0x70 + (x))
+++ /dev/null
-#include <asm-generic/rtc.h>
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
-#include <asm/rtc.h>
#define DEFAULT_RTC_INT_FREQ 64
#define DEFAULT_RTC_SHIFT 6
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- get_rtc_time(&curr_time);
+ mc146818_set_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <asm/x86_init.h>
#include <asm/time.h>
#include <asm/intel-mid.h>
-#include <asm/rtc.h>
#include <asm/setup.h>
#ifdef CONFIG_X86_32
rtc_time_to_tm(nowtime, &tm);
if (!rtc_valid_tm(&tm)) {
- retval = set_rtc_time(&tm);
+ retval = mc146818_set_time(&tm);
if (retval)
printk(KERN_ERR "%s: RTC write failed with error %d\n",
__func__, retval);
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
-#include <asm/rtc.h>
#include <asm/uv/uv.h>
static struct efi efi_phys __initdata;
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/init.h>
+#include <linux/mc146818rtc.h>
#include <linux/efi.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
#include <asm/intel-mid.h>
#include <asm/intel_mid_vrtc.h>
*/
unsigned long restore_cr3 __visible;
-pgd_t *temp_level4_pgt __visible;
+unsigned long temp_level4_pgt __visible;
unsigned long relocated_restore_code __visible;
-static int set_up_temporary_text_mapping(void)
+static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
pud_t *pud;
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+ set_pgd(pgd + pgd_index(restore_jump_address),
__pgd(__pa(pud) | _KERNPG_TABLE));
return 0;
.kernel_mapping = true,
};
unsigned long mstart, mend;
+ pgd_t *pgd;
int result;
int i;
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
+ pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pgd)
return -ENOMEM;
/* Prepare a temporary mapping for the kernel text */
- result = set_up_temporary_text_mapping();
+ result = set_up_temporary_text_mapping(pgd);
if (result)
return result;
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
- result = kernel_ident_mapping_init(&info, temp_level4_pgt,
- mstart, mend);
-
+ result = kernel_ident_mapping_init(&info, pgd, mstart, mend);
if (result)
return result;
}
+ temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
return 0;
}
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
/* switch to temporary page tables */
- movq $__PAGE_OFFSET, %rcx
- subq %rcx, %rax
movq %rax, %cr3
/* flush TLB */
movq %rbx, %rcx
# Building vDSO images for x86.
#
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
VDSO64-y := y
vdso-install-$(VDSO64-y) += vdso.so
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm-generic/rtc.h>
+#include <linux/mc146818rtc.h>
#include "internal.h"
acpi_device_dir(device) = NULL;
remove_lid_dir:
remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir);
+ acpi_lid_dir = NULL;
remove_button_dir:
remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir);
+ acpi_button_dir = NULL;
goto done;
}
acpi_lid_dir);
acpi_device_dir(device) = NULL;
remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir);
+ acpi_lid_dir = NULL;
remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir);
+ acpi_button_dir = NULL;
return 0;
}
#define ACPI_EC_UDELAY_POLL 550 /* Wait 1ms for EC transaction polling */
#define ACPI_EC_CLEAR_MAX 100 /* Maximum number of events to query
* when trying to clear the EC */
+#define ACPI_EC_MAX_QUERIES 16 /* Maximum number of parallel queries */
enum {
EC_FLAGS_QUERY_PENDING, /* Query is pending */
module_param(ec_delay, uint, 0644);
MODULE_PARM_DESC(ec_delay, "Timeout(ms) waited until an EC command completes");
+static unsigned int ec_max_queries __read_mostly = ACPI_EC_MAX_QUERIES;
+module_param(ec_max_queries, uint, 0644);
+MODULE_PARM_DESC(ec_max_queries, "Maximum parallel _Qxx evaluations");
+
static bool ec_busy_polling __read_mostly;
module_param(ec_busy_polling, bool, 0644);
MODULE_PARM_DESC(ec_busy_polling, "Use busy polling to advance EC transaction");
struct acpi_ec *boot_ec, *first_ec;
EXPORT_SYMBOL(first_ec);
+static struct workqueue_struct *ec_query_wq;
static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
* work queue execution.
*/
ec_dbg_evt("Query(0x%02x) scheduled", value);
- if (!schedule_work(&q->work)) {
+ if (!queue_work(ec_query_wq, &q->work)) {
ec_dbg_evt("Query(0x%02x) overlapped", value);
result = -EBUSY;
}
},
};
+static inline int acpi_ec_query_init(void)
+{
+ if (!ec_query_wq) {
+ ec_query_wq = alloc_workqueue("kec_query", 0,
+ ec_max_queries);
+ if (!ec_query_wq)
+ return -ENODEV;
+ }
+ return 0;
+}
+
+static inline void acpi_ec_query_exit(void)
+{
+ if (ec_query_wq) {
+ destroy_workqueue(ec_query_wq);
+ ec_query_wq = NULL;
+ }
+}
+
int __init acpi_ec_init(void)
{
- int result = 0;
+ int result;
+ /* register workqueue for _Qxx evaluations */
+ result = acpi_ec_query_init();
+ if (result)
+ goto err_exit;
/* Now register the driver for the EC */
result = acpi_bus_register_driver(&acpi_ec_driver);
- if (result < 0)
- return -ENODEV;
+ if (result)
+ goto err_exit;
+err_exit:
+ if (result)
+ acpi_ec_query_exit();
return result;
}
{
acpi_bus_unregister_driver(&acpi_ec_driver);
+ acpi_ec_query_exit();
}
#endif /* 0 */
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
+static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table,
+ unsigned long *freq)
+{
+ struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+ list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
+ if (temp_opp->available && temp_opp->rate >= *freq) {
+ opp = temp_opp;
+ *freq = opp->rate;
+ break;
+ }
+ }
+
+ return opp;
+}
+
/**
* dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq
* @dev: device for which we do this operation
unsigned long *freq)
{
struct opp_table *opp_table;
- struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
opp_rcu_lockdep_assert();
if (IS_ERR(opp_table))
return ERR_CAST(opp_table);
- list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
- if (temp_opp->available && temp_opp->rate >= *freq) {
- opp = temp_opp;
- *freq = opp->rate;
- break;
- }
- }
-
- return opp;
+ return _find_freq_ceil(opp_table, freq);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
return PTR_ERR(opp_table);
}
- old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq);
+ old_opp = _find_freq_ceil(opp_table, &old_freq);
if (!IS_ERR(old_opp)) {
ou_volt = old_opp->u_volt;
ou_volt_min = old_opp->u_volt_min;
__func__, old_freq, PTR_ERR(old_opp));
}
- opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ opp = _find_freq_ceil(opp_table, &freq);
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n",
#include <linux/export.h>
#include <linux/rtc.h>
-#include <asm/rtc.h>
+#include <linux/mc146818rtc.h>
#include "power.h"
n /= 24;
time.tm_min = (n % 20) * 3;
n /= 20;
- set_rtc_time(&time);
+ mc146818_set_time(&time);
return n ? -1 : 0;
}
struct rtc_time time;
unsigned int val;
- get_rtc_time(&time);
+ mc146818_get_time(&time);
pr_info("RTC time: %2d:%02d:%02d, date: %02d/%02d/%02d\n",
time.tm_hour, time.tm_min, time.tm_sec,
time.tm_mon + 1, time.tm_mday, time.tm_year % 100);
struct wakeup_source *ws;
rcu_read_lock();
- list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
- if (ws->wakeirq)
- dev_pm_arm_wake_irq(ws->wakeirq);
- }
+ list_for_each_entry_rcu(ws, &wakeup_sources, entry)
+ dev_pm_arm_wake_irq(ws->wakeirq);
+
rcu_read_unlock();
}
struct wakeup_source *ws;
rcu_read_lock();
- list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
- if (ws->wakeirq)
- dev_pm_disarm_wake_irq(ws->wakeirq);
- }
+ list_for_each_entry_rcu(ws, &wakeup_sources, entry)
+ dev_pm_disarm_wake_irq(ws->wakeirq);
+
rcu_read_unlock();
}
return -EINVAL;
ws = device_wakeup_detach(dev);
- if (ws)
- wakeup_source_unregister(ws);
-
+ wakeup_source_unregister(ws);
return 0;
}
EXPORT_SYMBOL_GPL(device_wakeup_disable);
config RTC
tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)"
- depends on ALPHA || (MIPS && MACH_LOONGSON64) || MN10300
+ depends on ALPHA || (MIPS && MACH_LOONGSON64)
---help---
If you say Y here and create a character special file /dev/rtc with
major number 10 and minor number 135 using mknod ("man mknod"), you
To compile this driver as a module, choose M here: the
module will be called js-rtc.
-config GEN_RTC
- tristate "Generic /dev/rtc emulation"
- depends on RTC!=y
- depends on ALPHA || M68K || MN10300 || PARISC || PPC || X86
- ---help---
- If you say Y here and create a character special file /dev/rtc with
- major number 10 and minor number 135 using mknod ("man mknod"), you
- will get access to the real time clock (or hardware clock) built
- into your computer.
-
- It reports status information via the file /proc/driver/rtc and its
- behaviour is set by various ioctls on /dev/rtc. If you enable the
- "extended RTC operation" below it will also provide an emulation
- for RTC_UIE which is required by some programs and may improve
- precision in some cases.
-
- To compile this driver as a module, choose M here: the
- module will be called genrtc.
-
-config GEN_RTC_X
- bool "Extended RTC operation"
- depends on GEN_RTC
- help
- Provides an emulation for RTC_UIE which is required by some programs
- and may improve precision of the generic RTC support in some cases.
-
config EFI_RTC
bool "EFI Real Time Clock Services"
depends on IA64
obj-$(CONFIG_SONYPI) += sonypi.o
obj-$(CONFIG_RTC) += rtc.o
obj-$(CONFIG_HPET) += hpet.o
-obj-$(CONFIG_GEN_RTC) += genrtc.o
obj-$(CONFIG_EFI_RTC) += efirtc.o
obj-$(CONFIG_DS1302) += ds1302.o
obj-$(CONFIG_XILINX_HWICAP) += xilinx_hwicap/
+++ /dev/null
-/*
- * Real Time Clock interface for
- * - q40 and other m68k machines,
- * - HP PARISC machines
- * - PowerPC machines
- * emulate some RTC irq capabilities in software
- *
- * Copyright (C) 1999 Richard Zidlicky
- *
- * based on Paul Gortmaker's rtc.c device and
- * Sam Creasey Generic rtc driver
- *
- * This driver allows use of the real time clock (built into
- * nearly all computers) from user space. It exports the /dev/rtc
- * interface supporting various ioctl() and also the /proc/driver/rtc
- * pseudo-file for status information.
- *
- * The ioctls can be used to set the interrupt behaviour where
- * supported.
- *
- * The /dev/rtc interface will block on reads until an interrupt
- * has been received. If a RTC interrupt has already happened,
- * it will output an unsigned long and then block. The output value
- * contains the interrupt status in the low byte and the number of
- * interrupts since the last read in the remaining high bytes. The
- * /dev/rtc interface can also be used with the select(2) call.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
-
- * 1.01 fix for 2.3.X rz@linux-m68k.org
- * 1.02 merged with code from genrtc.c rz@linux-m68k.org
- * 1.03 make it more portable zippel@linux-m68k.org
- * 1.04 removed useless timer code rz@linux-m68k.org
- * 1.05 portable RTC_UIE emulation rz@linux-m68k.org
- * 1.06 set_rtc_time can return an error trini@kernel.crashing.org
- * 1.07 ported to HP PARISC (hppa) Helge Deller <deller@gmx.de>
- */
-
-#define RTC_VERSION "1.07"
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/miscdevice.h>
-#include <linux/fcntl.h>
-
-#include <linux/rtc.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/workqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/rtc.h>
-
-/*
- * We sponge a minor off of the misc major. No need slurping
- * up another valuable major dev number for this. If you add
- * an ioctl, make sure you don't conflict with SPARC's RTC
- * ioctls.
- */
-
-static DEFINE_MUTEX(gen_rtc_mutex);
-static DECLARE_WAIT_QUEUE_HEAD(gen_rtc_wait);
-
-/*
- * Bits in gen_rtc_status.
- */
-
-#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */
-
-static unsigned char gen_rtc_status; /* bitmapped status byte. */
-static unsigned long gen_rtc_irq_data; /* our output to the world */
-
-/* months start at 0 now */
-static unsigned char days_in_mo[] =
-{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
-
-static int irq_active;
-
-#ifdef CONFIG_GEN_RTC_X
-static struct work_struct genrtc_task;
-static struct timer_list timer_task;
-
-static unsigned int oldsecs;
-static int lostint;
-static unsigned long tt_exp;
-
-static void gen_rtc_timer(unsigned long data);
-
-static volatile int stask_active; /* schedule_work */
-static volatile int ttask_active; /* timer_task */
-static int stop_rtc_timers; /* don't requeue tasks */
-static DEFINE_SPINLOCK(gen_rtc_lock);
-
-static void gen_rtc_interrupt(unsigned long arg);
-
-/*
- * Routine to poll RTC seconds field for change as often as possible,
- * after first RTC_UIE use timer to reduce polling
- */
-static void genrtc_troutine(struct work_struct *work)
-{
- unsigned int tmp = get_rtc_ss();
-
- if (stop_rtc_timers) {
- stask_active = 0;
- return;
- }
-
- if (oldsecs != tmp){
- oldsecs = tmp;
-
- timer_task.function = gen_rtc_timer;
- timer_task.expires = jiffies + HZ - (HZ/10);
- tt_exp=timer_task.expires;
- ttask_active=1;
- stask_active=0;
- add_timer(&timer_task);
-
- gen_rtc_interrupt(0);
- } else if (schedule_work(&genrtc_task) == 0)
- stask_active = 0;
-}
-
-static void gen_rtc_timer(unsigned long data)
-{
- lostint = get_rtc_ss() - oldsecs ;
- if (lostint<0)
- lostint = 60 - lostint;
- if (time_after(jiffies, tt_exp))
- printk(KERN_INFO "genrtc: timer task delayed by %ld jiffies\n",
- jiffies-tt_exp);
- ttask_active=0;
- stask_active=1;
- if ((schedule_work(&genrtc_task) == 0))
- stask_active = 0;
-}
-
-/*
- * call gen_rtc_interrupt function to signal an RTC_UIE,
- * arg is unused.
- * Could be invoked either from a real interrupt handler or
- * from some routine that periodically (eg 100HZ) monitors
- * whether RTC_SECS changed
- */
-static void gen_rtc_interrupt(unsigned long arg)
-{
- /* We store the status in the low byte and the number of
- * interrupts received since the last read in the remainder
- * of rtc_irq_data. */
-
- gen_rtc_irq_data += 0x100;
- gen_rtc_irq_data &= ~0xff;
- gen_rtc_irq_data |= RTC_UIE;
-
- if (lostint){
- printk("genrtc: system delaying clock ticks?\n");
- /* increment count so that userspace knows something is wrong */
- gen_rtc_irq_data += ((lostint-1)<<8);
- lostint = 0;
- }
-
- wake_up_interruptible(&gen_rtc_wait);
-}
-
-/*
- * Now all the various file operations that we export.
- */
-static ssize_t gen_rtc_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- unsigned long data;
- ssize_t retval;
-
- if (count != sizeof (unsigned int) && count != sizeof (unsigned long))
- return -EINVAL;
-
- if (file->f_flags & O_NONBLOCK && !gen_rtc_irq_data)
- return -EAGAIN;
-
- retval = wait_event_interruptible(gen_rtc_wait,
- (data = xchg(&gen_rtc_irq_data, 0)));
- if (retval)
- goto out;
-
- /* first test allows optimizer to nuke this case for 32-bit machines */
- if (sizeof (int) != sizeof (long) && count == sizeof (unsigned int)) {
- unsigned int uidata = data;
- retval = put_user(uidata, (unsigned int __user *)buf) ?:
- sizeof(unsigned int);
- }
- else {
- retval = put_user(data, (unsigned long __user *)buf) ?:
- sizeof(unsigned long);
- }
-out:
- return retval;
-}
-
-static unsigned int gen_rtc_poll(struct file *file,
- struct poll_table_struct *wait)
-{
- poll_wait(file, &gen_rtc_wait, wait);
- if (gen_rtc_irq_data != 0)
- return POLLIN | POLLRDNORM;
- return 0;
-}
-
-#endif
-
-/*
- * Used to disable/enable interrupts, only RTC_UIE supported
- * We also clear out any old irq data after an ioctl() that
- * meddles with the interrupt enable/disable bits.
- */
-
-static inline void gen_clear_rtc_irq_bit(unsigned char bit)
-{
-#ifdef CONFIG_GEN_RTC_X
- stop_rtc_timers = 1;
- if (ttask_active){
- del_timer_sync(&timer_task);
- ttask_active = 0;
- }
- while (stask_active)
- schedule();
-
- spin_lock(&gen_rtc_lock);
- irq_active = 0;
- spin_unlock(&gen_rtc_lock);
-#endif
-}
-
-static inline int gen_set_rtc_irq_bit(unsigned char bit)
-{
-#ifdef CONFIG_GEN_RTC_X
- spin_lock(&gen_rtc_lock);
- if ( !irq_active ) {
- irq_active = 1;
- stop_rtc_timers = 0;
- lostint = 0;
- INIT_WORK(&genrtc_task, genrtc_troutine);
- oldsecs = get_rtc_ss();
- init_timer(&timer_task);
-
- stask_active = 1;
- if (schedule_work(&genrtc_task) == 0){
- stask_active = 0;
- }
- }
- spin_unlock(&gen_rtc_lock);
- gen_rtc_irq_data = 0;
- return 0;
-#else
- return -EINVAL;
-#endif
-}
-
-static int gen_rtc_ioctl(struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct rtc_time wtime;
- struct rtc_pll_info pll;
- void __user *argp = (void __user *)arg;
-
- switch (cmd) {
-
- case RTC_PLL_GET:
- if (get_rtc_pll(&pll))
- return -EINVAL;
- else
- return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0;
-
- case RTC_PLL_SET:
- if (!capable(CAP_SYS_TIME))
- return -EACCES;
- if (copy_from_user(&pll, argp, sizeof(pll)))
- return -EFAULT;
- return set_rtc_pll(&pll);
-
- case RTC_UIE_OFF: /* disable ints from RTC updates. */
- gen_clear_rtc_irq_bit(RTC_UIE);
- return 0;
-
- case RTC_UIE_ON: /* enable ints for RTC updates. */
- return gen_set_rtc_irq_bit(RTC_UIE);
-
- case RTC_RD_TIME: /* Read the time/date from RTC */
- /* this doesn't get week-day, who cares */
- memset(&wtime, 0, sizeof(wtime));
- get_rtc_time(&wtime);
-
- return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0;
-
- case RTC_SET_TIME: /* Set the RTC */
- {
- int year;
- unsigned char leap_yr;
-
- if (!capable(CAP_SYS_TIME))
- return -EACCES;
-
- if (copy_from_user(&wtime, argp, sizeof(wtime)))
- return -EFAULT;
-
- year = wtime.tm_year + 1900;
- leap_yr = ((!(year % 4) && (year % 100)) ||
- !(year % 400));
-
- if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1))
- return -EINVAL;
-
- if (wtime.tm_mday < 0 || wtime.tm_mday >
- (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr)))
- return -EINVAL;
-
- if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 ||
- wtime.tm_min < 0 || wtime.tm_min >= 60 ||
- wtime.tm_sec < 0 || wtime.tm_sec >= 60)
- return -EINVAL;
-
- return set_rtc_time(&wtime);
- }
- }
-
- return -EINVAL;
-}
-
-static long gen_rtc_unlocked_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- int ret;
-
- mutex_lock(&gen_rtc_mutex);
- ret = gen_rtc_ioctl(file, cmd, arg);
- mutex_unlock(&gen_rtc_mutex);
-
- return ret;
-}
-
-/*
- * We enforce only one user at a time here with the open/close.
- * Also clear the previous interrupt data on an open, and clean
- * up things on a close.
- */
-
-static int gen_rtc_open(struct inode *inode, struct file *file)
-{
- mutex_lock(&gen_rtc_mutex);
- if (gen_rtc_status & RTC_IS_OPEN) {
- mutex_unlock(&gen_rtc_mutex);
- return -EBUSY;
- }
-
- gen_rtc_status |= RTC_IS_OPEN;
- gen_rtc_irq_data = 0;
- irq_active = 0;
- mutex_unlock(&gen_rtc_mutex);
-
- return 0;
-}
-
-static int gen_rtc_release(struct inode *inode, struct file *file)
-{
- /*
- * Turn off all interrupts once the device is no longer
- * in use and clear the data.
- */
-
- gen_clear_rtc_irq_bit(RTC_PIE|RTC_AIE|RTC_UIE);
-
- gen_rtc_status &= ~RTC_IS_OPEN;
- return 0;
-}
-
-
-#ifdef CONFIG_PROC_FS
-
-/*
- * Info exported via "/proc/driver/rtc".
- */
-
-static int gen_rtc_proc_show(struct seq_file *m, void *v)
-{
- struct rtc_time tm;
- unsigned int flags;
- struct rtc_pll_info pll;
-
- flags = get_rtc_time(&tm);
-
- seq_printf(m,
- "rtc_time\t: %02d:%02d:%02d\n"
- "rtc_date\t: %04d-%02d-%02d\n"
- "rtc_epoch\t: %04u\n",
- tm.tm_hour, tm.tm_min, tm.tm_sec,
- tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 1900);
-
- tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
-
- seq_puts(m, "alarm\t\t: ");
- if (tm.tm_hour <= 24)
- seq_printf(m, "%02d:", tm.tm_hour);
- else
- seq_puts(m, "**:");
-
- if (tm.tm_min <= 59)
- seq_printf(m, "%02d:", tm.tm_min);
- else
- seq_puts(m, "**:");
-
- if (tm.tm_sec <= 59)
- seq_printf(m, "%02d\n", tm.tm_sec);
- else
- seq_puts(m, "**\n");
-
- seq_printf(m,
- "DST_enable\t: %s\n"
- "BCD\t\t: %s\n"
- "24hr\t\t: %s\n"
- "square_wave\t: %s\n"
- "alarm_IRQ\t: %s\n"
- "update_IRQ\t: %s\n"
- "periodic_IRQ\t: %s\n"
- "periodic_freq\t: %ld\n"
- "batt_status\t: %s\n",
- (flags & RTC_DST_EN) ? "yes" : "no",
- (flags & RTC_DM_BINARY) ? "no" : "yes",
- (flags & RTC_24H) ? "yes" : "no",
- (flags & RTC_SQWE) ? "yes" : "no",
- (flags & RTC_AIE) ? "yes" : "no",
- irq_active ? "yes" : "no",
- (flags & RTC_PIE) ? "yes" : "no",
- 0L /* freq */,
- (flags & RTC_BATT_BAD) ? "bad" : "okay");
- if (!get_rtc_pll(&pll))
- seq_printf(m,
- "PLL adjustment\t: %d\n"
- "PLL max +ve adjustment\t: %d\n"
- "PLL max -ve adjustment\t: %d\n"
- "PLL +ve adjustment factor\t: %d\n"
- "PLL -ve adjustment factor\t: %d\n"
- "PLL frequency\t: %ld\n",
- pll.pll_value,
- pll.pll_max,
- pll.pll_min,
- pll.pll_posmult,
- pll.pll_negmult,
- pll.pll_clock);
- return 0;
-}
-
-static int gen_rtc_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, gen_rtc_proc_show, NULL);
-}
-
-static const struct file_operations gen_rtc_proc_fops = {
- .open = gen_rtc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int __init gen_rtc_proc_init(void)
-{
- struct proc_dir_entry *r;
-
- r = proc_create("driver/rtc", 0, NULL, &gen_rtc_proc_fops);
- if (!r)
- return -ENOMEM;
- return 0;
-}
-#else
-static inline int gen_rtc_proc_init(void) { return 0; }
-#endif /* CONFIG_PROC_FS */
-
-
-/*
- * The various file operations we support.
- */
-
-static const struct file_operations gen_rtc_fops = {
- .owner = THIS_MODULE,
-#ifdef CONFIG_GEN_RTC_X
- .read = gen_rtc_read,
- .poll = gen_rtc_poll,
-#endif
- .unlocked_ioctl = gen_rtc_unlocked_ioctl,
- .open = gen_rtc_open,
- .release = gen_rtc_release,
- .llseek = noop_llseek,
-};
-
-static struct miscdevice rtc_gen_dev =
-{
- .minor = RTC_MINOR,
- .name = "rtc",
- .fops = &gen_rtc_fops,
-};
-
-static int __init rtc_generic_init(void)
-{
- int retval;
-
- printk(KERN_INFO "Generic RTC Driver v%s\n", RTC_VERSION);
-
- retval = misc_register(&rtc_gen_dev);
- if (retval < 0)
- return retval;
-
- retval = gen_rtc_proc_init();
- if (retval) {
- misc_deregister(&rtc_gen_dev);
- return retval;
- }
-
- return 0;
-}
-
-static void __exit rtc_generic_exit(void)
-{
- remove_proc_entry ("driver/rtc", NULL);
- misc_deregister(&rtc_gen_dev);
-}
-
-
-module_init(rtc_generic_init);
-module_exit(rtc_generic_exit);
-
-MODULE_AUTHOR("Richard Zidlicky");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_MISCDEV(RTC_MINOR);
config CPU_FREQ_STAT
bool "CPU frequency transition statistics"
- default y
help
Export CPU frequency statistics information through sysfs.
static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
+ ICPU(INTEL_FAM6_BROADWELL_X, core_params),
+ ICPU(INTEL_FAM6_SKYLAKE_X, core_params),
{}
};
source "drivers/infiniband/ulp/isert/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
+source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
+#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
unsigned short port;
};
+struct class_port_info_context {
+ struct ib_class_port_info *class_port_info;
+ struct ib_device *device;
+ struct completion done;
+ struct ib_sa_query *sa_query;
+ u8 port_num;
+};
+
static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct sockaddr_storage addr;
struct kref mcref;
bool igmp_joined;
+ u8 join_state;
};
struct cma_work {
}
}
+static void cma_query_sa_classport_info_cb(int status,
+ struct ib_class_port_info *rec,
+ void *context)
+{
+ struct class_port_info_context *cb_ctx = context;
+
+ WARN_ON(!context);
+
+ if (status || !rec) {
+ pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
+ cb_ctx->device->name, cb_ctx->port_num, status);
+ goto out;
+ }
+
+ memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
+
+out:
+ complete(&cb_ctx->done);
+}
+
+static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
+ struct ib_class_port_info *class_port_info)
+{
+ struct class_port_info_context *cb_ctx;
+ int ret;
+
+ cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
+ if (!cb_ctx)
+ return -ENOMEM;
+
+ cb_ctx->device = device;
+ cb_ctx->class_port_info = class_port_info;
+ cb_ctx->port_num = port_num;
+ init_completion(&cb_ctx->done);
+
+ ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
+ CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
+ GFP_KERNEL, cma_query_sa_classport_info_cb,
+ cb_ctx, &cb_ctx->sa_query);
+ if (ret < 0) {
+ pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
+ device->name, port_num, ret);
+ goto out;
+ }
+
+ wait_for_completion(&cb_ctx->done);
+
+out:
+ kfree(cb_ctx);
+ return ret;
+}
+
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
+ struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
rec.qkey = cpu_to_be32(id_priv->qkey);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
- rec.join_state = 1;
+ rec.join_state = mc->join_state;
+
+ if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
+ ret = cma_query_sa_classport_info(id_priv->id.device,
+ id_priv->id.port_num,
+ &class_port_info);
+
+ if (ret)
+ return ret;
+
+ if (!(ib_get_cpi_capmask2(&class_port_info) &
+ IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
+ pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+ "RDMA CM: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.device->name, id_priv->id.port_num);
+ return -EOPNOTSUPP;
+ }
+ }
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
enum ib_gid_type gid_type;
+ bool send_only;
+
+ send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
- true);
- if (!err)
- mc->igmp_joined = true;
+ if (!send_only) {
+ err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ true);
+ if (!err)
+ mc->igmp_joined = true;
+ }
}
} else {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
- void *context)
+ u8 join_state, void *context)
{
struct rdma_id_private *id_priv;
struct cma_multicast *mc;
mc->context = context;
mc->id_priv = id_priv;
mc->igmp_joined = false;
+ mc->join_state = join_state;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
return 0;
}
+void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
+{
+ if (dev->get_dev_fw_str)
+ dev->get_dev_fw_str(dev, str, str_len);
+ else
+ str[0] = '\0';
+}
+EXPORT_SYMBOL(ib_get_device_fw_str);
+
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
/*
* Release a reference on cm_id. If the last reference is being
- * released, enable the waiting thread (in iw_destroy_cm_id) to
- * get woken up, and return 1 if a thread is already waiting.
+ * released, free the cm_id and return 1.
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
if (atomic_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
- complete(&cm_id_priv->destroy_comp);
+ free_cm_id(cm_id_priv);
return 1;
}
static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
- int cb_destroy;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- /*
- * Test bit before deref in case the cm_id gets freed on another
- * thread.
- */
- cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
- free_cm_id(cm_id_priv);
- }
+ (void)iwcm_deref_id(cm_id_priv);
}
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
wait_event(cm_id_priv->connect_wait,
!test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+ /*
+ * Since we're deleting the cm_id, drop any events that
+ * might arrive before the last dereference.
+ */
+ set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
+
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
-
destroy_cm_id(cm_id);
-
- wait_for_completion(&cm_id_priv->destroy_comp);
-
- free_cm_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
ret = cm_id->cm_handler(cm_id, iw_event);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
- set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- destroy_cm_id(cm_id);
- if (atomic_read(&cm_id_priv->refcount)==0)
- free_cm_id(cm_id_priv);
+ iw_destroy_cm_id(cm_id);
}
out:
unsigned long flags;
int empty;
int ret = 0;
- int destroy_id;
spin_lock_irqsave(&cm_id_priv->lock, flags);
empty = list_empty(&cm_id_priv->work_list);
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = process_event(cm_id_priv, &levent);
- if (ret) {
- set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- destroy_cm_id(&cm_id_priv->id);
- }
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- if (iwcm_deref_id(cm_id_priv)) {
- if (destroy_id) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
- free_cm_id(cm_id_priv);
- }
+ if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
+ ret = process_event(cm_id_priv, &levent);
+ if (ret)
+ destroy_cm_id(&cm_id_priv->id);
+ } else
+ pr_debug("dropping event %d\n", levent.event);
+ if (iwcm_deref_id(cm_id_priv))
return;
- }
if (empty)
return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
struct list_head work_free_list;
};
-#define IWCM_F_CALLBACK_DESTROY 1
+#define IWCM_F_DROP_EVENTS 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */
#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1)
#define IWPM_REMINFO_HASH_SIZE 64
#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1)
+#define IWPM_MSG_SIZE 512
static LIST_HEAD(iwpm_nlmsg_req_list);
static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
{
struct sk_buff *skb = NULL;
- skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ skb = dev_alloc_skb(IWPM_MSG_SIZE);
if (!skb) {
pr_err("%s Unable to allocate skb\n", __func__);
goto create_nlmsg_exit;
struct mcast_member;
-/*
-* There are 4 types of join states:
-* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
-*/
-enum {
- FULLMEMBER_JOIN,
- NONMEMBER_JOIN,
- SENDONLY_NONMEBER_JOIN,
- SENDONLY_FULLMEMBER_JOIN,
- NUM_JOIN_MEMBERSHIP_TYPES,
-};
-
struct mcast_group {
struct ib_sa_mcmember_rec rec;
struct rb_node node;
int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
__u32 pid)
{
- return nlmsg_unicast(nls, skb, pid);
+ int err;
+
+ err = netlink_unicast(nls, skb, pid, 0);
+ return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(ibnl_unicast);
return -ENOMEM;
}
+ nls->sk_sndtimeo = 10 * HZ;
return 0;
}
return false;
}
-static inline u32 rdma_rw_max_sge(struct ib_device *dev,
- enum dma_data_direction dir)
-{
- return dir == DMA_TO_DEVICE ?
- dev->attrs.max_sge : dev->attrs.max_sge_rd;
-}
-
static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
{
/* arbitrary limit to avoid allocating gigantic resources */
return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
}
+/* Caller must have zero-initialized *reg. */
static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
u32 sg_cnt, u32 offset)
u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
+ struct rdma_rw_reg_ctx *prev = NULL;
u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
int i, j, ret = 0, count = 0;
}
for (i = 0; i < ctx->nr_ops; i++) {
- struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL;
struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
u32 nents = min(sg_cnt, pages_per_mr);
sg_cnt -= nents;
for (j = 0; j < nents; j++)
sg = sg_next(sg);
+ prev = reg;
offset = 0;
}
+ if (prev)
+ prev->wr.wr.next = NULL;
+
ctx->type = RDMA_RW_MR;
return count;
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
- u32 max_sge = rdma_rw_max_sge(dev, dir);
+ u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
+ qp->max_read_sge;
struct ib_sge *sge;
u32 total_len = 0, i, j;
rdma_wr->wr.opcode = IB_WR_RDMA_READ;
rdma_wr->remote_addr = remote_addr + total_len;
rdma_wr->rkey = rkey;
+ rdma_wr->wr.num_sge = nr_sge;
rdma_wr->wr.sg_list = sge;
for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
- rdma_wr->wr.num_sge++;
-
sge->addr = ib_sg_dma_address(dev, sg) + offset;
sge->length = ib_sg_dma_len(dev, sg) - offset;
sge->lkey = qp->pd->local_dma_lkey;
offset = 0;
}
- if (i + 1 < ctx->nr_ops)
- rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr;
+ rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
+ &ctx->map.wrs[i + 1].wr : NULL;
}
ctx->type = RDMA_RW_MULTI_WR;
u8 src_path_mask;
};
+struct ib_sa_classport_cache {
+ bool valid;
+ struct ib_class_port_info data;
+};
+
struct ib_sa_port {
struct ib_mad_agent *agent;
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
+ struct ib_sa_classport_cache classport_info;
+ spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
};
port->sm_ah = NULL;
spin_unlock_irqrestore(&port->ah_lock, flags);
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE) {
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ }
queue_work(ib_wq, &sa_dev->port[event->element.port_num -
sa_dev->start_port].update_task);
}
int status,
struct ib_sa_mad *mad)
{
+ unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
ib_unpack(classport_info_rec_table,
ARRAY_SIZE(classport_info_rec_table),
mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock, flags);
+ if (!status && !sa_query->port->classport_info.valid) {
+ memcpy(&sa_query->port->classport_info.data, &rec,
+ sizeof(sa_query->port->classport_info.data));
+
+ sa_query->port->classport_info.valid = true;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+
query->callback(status, &rec, query->context);
} else {
query->callback(status, NULL, query->context);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
+ struct ib_class_port_info cached_class_port_info;
int ret;
+ unsigned long flags;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
+ /* Use cached ClassPortInfo attribute if valid instead of sending mad */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid && callback) {
+ memcpy(&cached_class_port_info, &port->classport_info.data,
+ sizeof(cached_class_port_info));
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ callback(0, &cached_class_port_info, context);
+ return 0;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
sa_dev->port[i].sm_ah = NULL;
sa_dev->port[i].port_num = i + s;
+ spin_lock_init(&sa_dev->port[i].classport_lock);
+ sa_dev->port[i].classport_info.valid = false;
+
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/netdevice.h>
+#include <linux/ethtool.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
return count;
}
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ ib_get_device_fw_str(dev, buf, PAGE_SIZE);
+ strlcat(buf, "\n", PAGE_SIZE);
+ return strlen(buf);
+}
+
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
- &dev_attr_node_desc
+ &dev_attr_node_desc,
+ &dev_attr_fw_ver,
};
static void free_port_list_attributes(struct ib_device *device)
int events_reported;
u64 uid;
+ u8 join_state;
struct list_head list;
struct sockaddr_storage addr;
};
struct ucma_multicast *mc;
struct sockaddr *addr;
int ret;
+ u8 join_state;
if (out_len < sizeof(resp))
return -ENOSPC;
addr = (struct sockaddr *) &cmd->addr;
- if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+ if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+ return -EINVAL;
+
+ if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
+ join_state = BIT(FULLMEMBER_JOIN);
+ else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
+ join_state = BIT(SENDONLY_FULLMEMBER_JOIN);
+ else
return -EINVAL;
ctx = ucma_get_ctx(file, cmd->id);
ret = -ENOMEM;
goto err1;
}
-
+ mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
- ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
+ ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
+ join_state, mc);
if (ret)
goto err2;
join_cmd.uid = cmd.uid;
join_cmd.id = cmd.id;
join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
- join_cmd.reserved = 0;
+ join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
return ucma_process_join(file, &join_cmd, out_len);
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
+ struct mutex cleanup_mutex; /* protect cleanup */
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uxrcd_object *uxrcd;
};
+struct ib_uwq_object {
+ struct ib_uevent_object uevent;
+};
+
struct ib_ucq_object {
struct ib_uobject uobject;
struct ib_uverbs_file *uverbs_file;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
+extern struct idr ib_uverbs_wq_idr;
+extern struct idr ib_uverbs_rwq_ind_tbl_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
+ struct ib_uverbs_flow_spec_ipv6 ipv6;
};
};
IB_UVERBS_DECLARE_EX_CMD(query_device);
IB_UVERBS_DECLARE_EX_CMD(create_cq);
IB_UVERBS_DECLARE_EX_CMD(create_qp);
+IB_UVERBS_DECLARE_EX_CMD(create_wq);
+IB_UVERBS_DECLARE_EX_CMD(modify_wq);
+IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
+IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
+IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
#endif /* UVERBS_H */
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
+static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
+static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
/*
* The ib_uobject locking scheme is as follows:
return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
}
+static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
+}
+
+static void put_wq_read(struct ib_wq *wq)
+{
+ put_uobj_read(wq->uobject);
+}
+
+static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
+ struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
+}
+
+static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
+{
+ put_uobj_read(ind_table->uobject);
+}
+
static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
{
struct ib_uobject *uobj;
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
+ INIT_LIST_HEAD(&ucontext->wq_list);
+ INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
INIT_LIST_HEAD(&ucontext->rule_list);
rcu_read_lock();
struct ib_qp_init_attr attr = {};
struct ib_uverbs_ex_create_qp_resp resp;
int ret;
+ struct ib_rwq_ind_table *ind_tbl = NULL;
+ bool has_sq = true;
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
&qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
+ if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
+ sizeof(cmd->rwq_ind_tbl_handle) &&
+ (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
+ ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
+ file->ucontext);
+ if (!ind_tbl) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ attr.rwq_ind_tbl = ind_tbl;
+ }
+
+ if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
+ sizeof(cmd->reserved1)) && cmd->reserved1) {
+ ret = -EOPNOTSUPP;
+ goto err_put;
+ }
+
+ if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ if (ind_tbl && !cmd->max_send_wr)
+ has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
}
}
- if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
- if (!rcq) {
- ret = -EINVAL;
- goto err_put;
+ if (!ind_tbl) {
+ if (cmd->recv_cq_handle != cmd->send_cq_handle) {
+ rcq = idr_read_cq(cmd->recv_cq_handle,
+ file->ucontext, 0);
+ if (!rcq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
}
}
}
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
- rcq = rcq ?: scq;
+ if (has_sq)
+ scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ if (!ind_tbl)
+ rcq = rcq ?: scq;
pd = idr_read_pd(cmd->pd_handle, file->ucontext);
- if (!pd || !scq) {
+ if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
}
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
qp->srq = attr.srq;
+ qp->rwq_ind_tbl = ind_tbl;
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
- atomic_inc(&attr.send_cq->usecnt);
+ if (attr.send_cq)
+ atomic_inc(&attr.send_cq->usecnt);
if (attr.recv_cq)
atomic_inc(&attr.recv_cq->usecnt);
if (attr.srq)
atomic_inc(&attr.srq->usecnt);
+ if (ind_tbl)
+ atomic_inc(&ind_tbl->usecnt);
}
qp->uobject = &obj->uevent.uobject;
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
+ if (ind_tbl)
+ put_rwq_indirection_table_read(ind_tbl);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
+ if (ind_tbl)
+ put_rwq_indirection_table_read(ind_tbl);
put_uobj_write(&obj->uevent.uobject);
return ret;
if (err)
return err;
- if (cmd.comp_mask)
+ if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
if (cmd.reserved)
memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
sizeof(struct ib_flow_ipv4_filter));
break;
+ case IB_FLOW_SPEC_IPV6:
+ ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6);
+ if (ib_spec->ipv6.size != kern_spec->ipv6.size)
+ return -EINVAL;
+ memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val,
+ sizeof(struct ib_flow_ipv6_filter));
+ memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask,
+ sizeof(struct ib_flow_ipv6_filter));
+ break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
return 0;
}
+int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_create_wq cmd = {};
+ struct ib_uverbs_ex_create_wq_resp resp = {};
+ struct ib_uwq_object *obj;
+ int err = 0;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_wq *wq;
+ struct ib_wq_init_attr wq_init_attr = {};
+ size_t required_cmd_sz;
+ size_t required_resp_len;
+
+ required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
+ required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
+
+ if (ucore->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (ucore->outlen < required_resp_len)
+ return -ENOSPC;
+
+ if (ucore->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+
+ err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (err)
+ return err;
+
+ if (cmd.comp_mask)
+ return -EOPNOTSUPP;
+
+ obj = kmalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
+ &wq_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq) {
+ err = -EINVAL;
+ goto err_put_pd;
+ }
+
+ wq_init_attr.cq = cq;
+ wq_init_attr.max_sge = cmd.max_sge;
+ wq_init_attr.max_wr = cmd.max_wr;
+ wq_init_attr.wq_context = file;
+ wq_init_attr.wq_type = cmd.wq_type;
+ wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+ if (IS_ERR(wq)) {
+ err = PTR_ERR(wq);
+ goto err_put_cq;
+ }
+
+ wq->uobject = &obj->uevent.uobject;
+ obj->uevent.uobject.object = wq;
+ wq->wq_type = wq_init_attr.wq_type;
+ wq->cq = cq;
+ wq->pd = pd;
+ wq->device = pd->device;
+ wq->wq_context = wq_init_attr.wq_context;
+ atomic_set(&wq->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&cq->usecnt);
+ wq->uobject = &obj->uevent.uobject;
+ obj->uevent.uobject.object = wq;
+ err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
+ if (err)
+ goto destroy_wq;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.wq_handle = obj->uevent.uobject.id;
+ resp.max_sge = wq_init_attr.max_sge;
+ resp.max_wr = wq_init_attr.max_wr;
+ resp.wqn = wq->wq_num;
+ resp.response_length = required_resp_len;
+ err = ib_copy_to_udata(ucore,
+ &resp, resp.response_length);
+ if (err)
+ goto err_copy;
+
+ put_pd_read(pd);
+ put_cq_read(cq);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uevent.uobject.live = 1;
+ up_write(&obj->uevent.uobject.mutex);
+ return 0;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
+destroy_wq:
+ ib_destroy_wq(wq);
+err_put_cq:
+ put_cq_read(cq);
+err_put_pd:
+ put_pd_read(pd);
+err_uobj:
+ put_uobj_write(&obj->uevent.uobject);
+
+ return err;
+}
+
+int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_destroy_wq cmd = {};
+ struct ib_uverbs_ex_destroy_wq_resp resp = {};
+ struct ib_wq *wq;
+ struct ib_uobject *uobj;
+ struct ib_uwq_object *obj;
+ size_t required_cmd_sz;
+ size_t required_resp_len;
+ int ret;
+
+ required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
+ required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+
+ if (ucore->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (ucore->outlen < required_resp_len)
+ return -ENOSPC;
+
+ if (ucore->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EOPNOTSUPP;
+
+ resp.response_length = required_resp_len;
+ uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ wq = uobj->object;
+ obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
+ ret = ib_destroy_wq(wq);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ ib_uverbs_release_uevent(file, &obj->uevent);
+ resp.events_reported = obj->uevent.events_reported;
+ put_uobj(uobj);
+
+ ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_modify_wq cmd = {};
+ struct ib_wq *wq;
+ struct ib_wq_attr wq_attr = {};
+ size_t required_cmd_sz;
+ int ret;
+
+ required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
+ if (ucore->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (ucore->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (!cmd.attr_mask)
+ return -EINVAL;
+
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ return -EINVAL;
+
+ wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ if (!wq)
+ return -EINVAL;
+
+ wq_attr.curr_wq_state = cmd.curr_wq_state;
+ wq_attr.wq_state = cmd.wq_state;
+ ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
+ put_wq_read(wq);
+ return ret;
+}
+
+int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
+ struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
+ struct ib_uobject *uobj;
+ int err = 0;
+ struct ib_rwq_ind_table_init_attr init_attr = {};
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ struct ib_wq **wqs = NULL;
+ u32 *wqs_handles = NULL;
+ struct ib_wq *wq = NULL;
+ int i, j, num_read_wqs;
+ u32 num_wq_handles;
+ u32 expected_in_size;
+ size_t required_cmd_sz_header;
+ size_t required_resp_len;
+
+ required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
+ required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
+
+ if (ucore->inlen < required_cmd_sz_header)
+ return -EINVAL;
+
+ if (ucore->outlen < required_resp_len)
+ return -ENOSPC;
+
+ err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+ if (err)
+ return err;
+
+ ucore->inbuf += required_cmd_sz_header;
+ ucore->inlen -= required_cmd_sz_header;
+
+ if (cmd.comp_mask)
+ return -EOPNOTSUPP;
+
+ if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE)
+ return -EINVAL;
+
+ num_wq_handles = 1 << cmd.log_ind_tbl_size;
+ expected_in_size = num_wq_handles * sizeof(__u32);
+ if (num_wq_handles == 1)
+ /* input size for wq handles is u64 aligned */
+ expected_in_size += sizeof(__u32);
+
+ if (ucore->inlen < expected_in_size)
+ return -EINVAL;
+
+ if (ucore->inlen > expected_in_size &&
+ !ib_is_udata_cleared(ucore, expected_in_size,
+ ucore->inlen - expected_in_size))
+ return -EOPNOTSUPP;
+
+ wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
+ GFP_KERNEL);
+ if (!wqs_handles)
+ return -ENOMEM;
+
+ err = ib_copy_from_udata(wqs_handles, ucore,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL);
+ if (!wqs) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
+ num_read_wqs++) {
+ wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ if (!wq) {
+ err = -EINVAL;
+ goto put_wqs;
+ }
+
+ wqs[num_read_wqs] = wq;
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ err = -ENOMEM;
+ goto put_wqs;
+ }
+
+ init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
+ down_write(&uobj->mutex);
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+ rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
+
+ if (IS_ERR(rwq_ind_tbl)) {
+ err = PTR_ERR(rwq_ind_tbl);
+ goto err_uobj;
+ }
+
+ rwq_ind_tbl->ind_tbl = wqs;
+ rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
+ rwq_ind_tbl->uobject = uobj;
+ uobj->object = rwq_ind_tbl;
+ rwq_ind_tbl->device = ib_dev;
+ atomic_set(&rwq_ind_tbl->usecnt, 0);
+
+ for (i = 0; i < num_wq_handles; i++)
+ atomic_inc(&wqs[i]->usecnt);
+
+ err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+ if (err)
+ goto destroy_ind_tbl;
+
+ resp.ind_tbl_handle = uobj->id;
+ resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
+ resp.response_length = required_resp_len;
+
+ err = ib_copy_to_udata(ucore,
+ &resp, resp.response_length);
+ if (err)
+ goto err_copy;
+
+ kfree(wqs_handles);
+
+ for (j = 0; j < num_read_wqs; j++)
+ put_wq_read(wqs[j]);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+ return 0;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+destroy_ind_tbl:
+ ib_destroy_rwq_ind_table(rwq_ind_tbl);
+err_uobj:
+ put_uobj_write(uobj);
+put_wqs:
+ for (j = 0; j < num_read_wqs; j++)
+ put_wq_read(wqs[j]);
+err_free:
+ kfree(wqs_handles);
+ kfree(wqs);
+ return err;
+}
+
+int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ struct ib_uobject *uobj;
+ int ret;
+ struct ib_wq **ind_tbl;
+ size_t required_cmd_sz;
+
+ required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
+
+ if (ucore->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (ucore->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EOPNOTSUPP;
+
+ uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ rwq_ind_tbl = uobj->object;
+ ind_tbl = rwq_ind_tbl->ind_tbl;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+ kfree(ind_tbl);
+ return ret;
+}
+
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
DEFINE_IDR(ib_uverbs_srq_idr);
DEFINE_IDR(ib_uverbs_xrcd_idr);
DEFINE_IDR(ib_uverbs_rule_idr);
+DEFINE_IDR(ib_uverbs_wq_idr);
+DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
[IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
[IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
[IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
+ [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
+ [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
+ [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
+ [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
+ [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
};
static void ib_uverbs_add_one(struct ib_device *device);
kfree(uqp);
}
+ list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+
+ idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+ ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ kfree(ind_tbl);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
+ struct ib_wq *wq = uobj->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobj, struct ib_uwq_object, uevent.uobject);
+
+ idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
+ ib_destroy_wq(wq);
+ ib_uverbs_release_uevent(file, &uwq->uevent);
+ kfree(uwq);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
&uobj->events_reported);
}
+void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
+ struct ib_uevent_object, uobject);
+
+ ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+ event->event, &uobj->event_list,
+ &uobj->events_reported);
+}
+
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
file->async_file = NULL;
kref_init(&file->ref);
mutex_init(&file->mutex);
+ mutex_init(&file->cleanup_mutex);
filp->private_data = file;
kobject_get(&dev->kobj);
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_device *dev = file->device;
- struct ib_ucontext *ucontext = NULL;
+
+ mutex_lock(&file->cleanup_mutex);
+ if (file->ucontext) {
+ ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ file->ucontext = NULL;
+ }
+ mutex_unlock(&file->cleanup_mutex);
mutex_lock(&file->device->lists_mutex);
- ucontext = file->ucontext;
- file->ucontext = NULL;
if (!file->is_closed) {
list_del(&file->list);
file->is_closed = 1;
}
mutex_unlock(&file->device->lists_mutex);
- if (ucontext)
- ib_uverbs_cleanup_ucontext(file, ucontext);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
struct ib_ucontext *ucontext;
-
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
file->is_closed = 1;
- ucontext = file->ucontext;
list_del(&file->list);
- file->ucontext = NULL;
kref_get(&file->ref);
mutex_unlock(&uverbs_dev->lists_mutex);
- /* We must release the mutex before going ahead and calling
- * disassociate_ucontext. disassociate_ucontext might end up
- * indirectly calling uverbs_close, for example due to freeing
- * the resources (e.g mmput).
- */
+
ib_uverbs_event_handler(&file->event_handler, &event);
+
+ mutex_lock(&file->cleanup_mutex);
+ ucontext = file->ucontext;
+ file->ucontext = NULL;
+ mutex_unlock(&file->cleanup_mutex);
+
+ /* At this point ib_uverbs_close cannot be running
+ * ib_uverbs_cleanup_ucontext
+ */
if (ucontext) {
+ /* We must release the mutex before going ahead and
+ * calling disassociate_ucontext. disassociate_ucontext
+ * might end up indirectly calling uverbs_close,
+ * for example due to freeing the resources
+ * (e.g mmput).
+ */
ib_dev->disassociate_ucontext(ucontext);
ib_uverbs_cleanup_ucontext(file, ucontext);
}
struct ib_qp *qp;
int ret;
+ if (qp_init_attr->rwq_ind_tbl &&
+ (qp_init_attr->recv_cq ||
+ qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
+ qp_init_attr->cap.max_recv_sge))
+ return ERR_PTR(-EINVAL);
+
/*
* If the callers is using the RDMA API calculate the resources
* needed for the RDMA READ/WRITE operations.
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
+ qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
atomic_set(&qp->usecnt, 0);
qp->mrs_used = 0;
qp->srq = NULL;
} else {
qp->recv_cq = qp_init_attr->recv_cq;
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ if (qp_init_attr->recv_cq)
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
qp->srq = qp_init_attr->srq;
if (qp->srq)
atomic_inc(&qp_init_attr->srq->usecnt);
qp->xrcd = NULL;
atomic_inc(&pd->usecnt);
- atomic_inc(&qp_init_attr->send_cq->usecnt);
+ if (qp_init_attr->send_cq)
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
+ if (qp_init_attr->rwq_ind_tbl)
+ atomic_inc(&qp->rwq_ind_tbl->usecnt);
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
}
}
+ /*
+ * Note: all hw drivers guarantee that max_send_sge is lower than
+ * the device RDMA WRITE SGE limit but not all hw drivers ensure that
+ * max_send_sge <= max_sge_rd.
+ */
+ qp->max_write_sge = qp_init_attr->cap.max_send_sge;
+ qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
+ device->attrs.max_sge_rd);
+
return qp;
}
EXPORT_SYMBOL(ib_create_qp);
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
+ struct ib_rwq_ind_table *ind_tbl;
int ret;
WARN_ON_ONCE(qp->mrs_used > 0);
scq = qp->send_cq;
rcq = qp->recv_cq;
srq = qp->srq;
+ ind_tbl = qp->rwq_ind_tbl;
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
+ if (ind_tbl)
+ atomic_dec(&ind_tbl->usecnt);
}
return ret;
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
+/**
+ * ib_create_wq - Creates a WQ associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the WQ.
+ * @wq_init_attr: A list of initial attributes required to create the
+ * WQ. If WQ creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created WQ.
+ *
+ * wq_init_attr->max_wr and wq_init_attr->max_sge determine
+ * the requested size of the WQ, and set to the actual values allocated
+ * on return.
+ * If ib_create_wq() succeeds, then max_wr and max_sge will always be
+ * at least as large as the requested values.
+ */
+struct ib_wq *ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *wq_attr)
+{
+ struct ib_wq *wq;
+
+ if (!pd->device->create_wq)
+ return ERR_PTR(-ENOSYS);
+
+ wq = pd->device->create_wq(pd, wq_attr, NULL);
+ if (!IS_ERR(wq)) {
+ wq->event_handler = wq_attr->event_handler;
+ wq->wq_context = wq_attr->wq_context;
+ wq->wq_type = wq_attr->wq_type;
+ wq->cq = wq_attr->cq;
+ wq->device = pd->device;
+ wq->pd = pd;
+ wq->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&wq_attr->cq->usecnt);
+ atomic_set(&wq->usecnt, 0);
+ }
+ return wq;
+}
+EXPORT_SYMBOL(ib_create_wq);
+
+/**
+ * ib_destroy_wq - Destroys the specified WQ.
+ * @wq: The WQ to destroy.
+ */
+int ib_destroy_wq(struct ib_wq *wq)
+{
+ int err;
+ struct ib_cq *cq = wq->cq;
+ struct ib_pd *pd = wq->pd;
+
+ if (atomic_read(&wq->usecnt))
+ return -EBUSY;
+
+ err = wq->device->destroy_wq(wq);
+ if (!err) {
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&cq->usecnt);
+ }
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_wq);
+
+/**
+ * ib_modify_wq - Modifies the specified WQ.
+ * @wq: The WQ to modify.
+ * @wq_attr: On input, specifies the WQ attributes to modify.
+ * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
+ * are being modified.
+ * On output, the current values of selected WQ attributes are returned.
+ */
+int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask)
+{
+ int err;
+
+ if (!wq->device->modify_wq)
+ return -ENOSYS;
+
+ err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
+ return err;
+}
+EXPORT_SYMBOL(ib_modify_wq);
+
+/*
+ * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
+ * @device: The device on which to create the rwq indirection table.
+ * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
+ * create the Indirection Table.
+ *
+ * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
+ * than the created ib_rwq_ind_table object and the caller is responsible
+ * for its memory allocation/free.
+ */
+struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr)
+{
+ struct ib_rwq_ind_table *rwq_ind_table;
+ int i;
+ u32 table_size;
+
+ if (!device->create_rwq_ind_table)
+ return ERR_PTR(-ENOSYS);
+
+ table_size = (1 << init_attr->log_ind_tbl_size);
+ rwq_ind_table = device->create_rwq_ind_table(device,
+ init_attr, NULL);
+ if (IS_ERR(rwq_ind_table))
+ return rwq_ind_table;
+
+ rwq_ind_table->ind_tbl = init_attr->ind_tbl;
+ rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
+ rwq_ind_table->device = device;
+ rwq_ind_table->uobject = NULL;
+ atomic_set(&rwq_ind_table->usecnt, 0);
+
+ for (i = 0; i < table_size; i++)
+ atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
+
+ return rwq_ind_table;
+}
+EXPORT_SYMBOL(ib_create_rwq_ind_table);
+
+/*
+ * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
+ * @wq_ind_table: The Indirection Table to destroy.
+*/
+int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
+{
+ int err, i;
+ u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
+ struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
+
+ if (atomic_read(&rwq_ind_table->usecnt))
+ return -EBUSY;
+
+ err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
+ if (!err) {
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
+
struct ib_flow *ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
state_set(&child_ep->com, CONNECTING);
child_ep->com.tdev = tdev;
child_ep->com.cm_id = NULL;
- child_ep->com.local_addr.sin_family = PF_INET;
+ child_ep->com.local_addr.sin_family = AF_INET;
child_ep->com.local_addr.sin_port = req->local_port;
child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
- child_ep->com.remote_addr.sin_family = PF_INET;
+ child_ep->com.remote_addr.sin_family = AF_INET;
child_ep->com.remote_addr.sin_port = req->peer_port;
child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
get_ep(&parent_ep->com);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
- ibdev.dev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- PDBG("%s dev 0x%p\n", __func__, dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.fw_version);
-}
-
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *iwch_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
};
return 0;
}
+static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
+ size_t str_len)
+{
+ struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+
+ PDBG("%s dev 0x%p\n", __func__, iwch_dev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ snprintf(str, str_len, "%s", info.fw_version);
+}
+
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
dev->ibdev.get_hw_stats = iwch_get_mib;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = iwch_port_immutable;
+ dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
return;
}
+static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
+{
+ struct sk_buff *skb;
+ unsigned int i;
+ size_t len;
+
+ len = roundup(sizeof(union cpl_wr_size), 16);
+ for (i = 0; i < size; i++) {
+ skb = alloc_skb(len, GFP_KERNEL);
+ if (!skb)
+ goto fail;
+ skb_queue_tail(ep_skb_list, skb);
+ }
+ return 0;
+fail:
+ skb_queue_purge(ep_skb_list);
+ return -ENOMEM;
+}
+
static void *alloc_ep(int size, gfp_t gfp)
{
struct c4iw_ep_common *epc;
if (ep->mpa_skb)
kfree_skb(ep->mpa_skb);
}
+ if (!skb_queue_empty(&ep->com.ep_skb_list))
+ skb_queue_purge(&ep->com.ep_skb_list);
kfree(ep);
}
}
}
-static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
+static int send_flowc(struct c4iw_ep *ep)
{
- unsigned int flowclen = 80;
struct fw_flowc_wr *flowc;
+ struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
int i;
u16 vlan = ep->l2t->vlan;
int nparams;
+ if (WARN_ON(!skb))
+ return -ENOMEM;
+
if (vlan == CPL_L2T_VLAN_NONE)
nparams = 8;
else
nparams = 9;
- skb = get_skb(skb, flowclen, GFP_KERNEL);
- flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
+ flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
FW_FLOWC_WR_NPARAMS_V(nparams));
- flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
+ flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN,
16)) | FW_WR_FLOWID_V(ep->hwtid));
flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
return c4iw_ofld_send(&ep->com.dev->rdev, skb);
}
-static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
+static int send_halfclose(struct c4iw_ep *ep)
{
struct cpl_close_con_req *req;
- struct sk_buff *skb;
+ struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
int wrlen = roundup(sizeof *req, 16);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- skb = get_skb(NULL, wrlen, gfp);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ if (WARN_ON(!skb))
return -ENOMEM;
- }
+
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
-static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
+static int send_abort(struct c4iw_ep *ep)
{
struct cpl_abort_req *req;
int wrlen = roundup(sizeof *req, 16);
+ struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- skb = get_skb(skb, wrlen, gfp);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ if (WARN_ON(!req_skb))
return -ENOMEM;
- }
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, ep, abort_arp_failure);
- req = (struct cpl_abort_req *) skb_put(skb, wrlen);
+
+ set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
+ req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
req->cmd = CPL_ABORT_SEND_RST;
- return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static void best_mtu(const unsigned short *mtus, unsigned short mtu,
mpa = (struct mpa_message *)(req + 1);
memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
- mpa->flags = (crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0) |
- (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
+
+ mpa->flags = 0;
+ if (crc_enabled)
+ mpa->flags |= MPA_CRC;
+ if (markers_enabled) {
+ mpa->flags |= MPA_MARKERS;
+ ep->mpa_attr.recv_marker_enabled = 1;
+ } else {
+ ep->mpa_attr.recv_marker_enabled = 0;
+ }
+ if (mpa_rev_to_use == 2)
+ mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+
mpa->private_data_size = htons(ep->plen);
mpa->revision = mpa_rev_to_use;
if (mpa_rev_to_use == 1) {
mpa = (struct mpa_message *)(req + 1);
memset(mpa, 0, sizeof(*mpa));
memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
+ mpa->flags = 0;
+ if (ep->mpa_attr.crc_enabled)
+ mpa->flags |= MPA_CRC;
+ if (ep->mpa_attr.recv_marker_enabled)
+ mpa->flags |= MPA_MARKERS;
mpa->revision = ep->mpa_attr.version;
mpa->private_data_size = htons(plen);
set_bit(ACT_ESTAB, &ep->com.history);
/* start MPA negotiation */
- ret = send_flowc(ep, NULL);
+ ret = send_flowc(ep);
if (ret)
goto err;
if (ep->retry_with_mpa_v1)
*/
__state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
}
/*
- * Return whether a failed active open has allocated a TID
+ * Some of the error codes above implicitly indicate that there is no TID
+ * allocated with the result of an ACT_OPEN. We use this predicate to make
+ * that explicit.
*/
static inline int act_open_has_tid(int status)
{
- return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
- status != CPL_ERR_ARP_MISS;
+ return (status != CPL_ERR_TCAM_PARITY &&
+ status != CPL_ERR_TCAM_MISS &&
+ status != CPL_ERR_TCAM_FULL &&
+ status != CPL_ERR_CONN_EXIST_SYNRECV &&
+ status != CPL_ERR_CONN_EXIST);
}
/* Returns whether a CPL status conveys negative advice.
static int c4iw_reconnect(struct c4iw_ep *ep)
{
int err = 0;
+ int size = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)
&ep->com.cm_id->m_local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)
init_timer(&ep->timer);
c4iw_init_wr_wait(&ep->com.wr_wait);
+ /* When MPA revision is different on nodes, the node with MPA_rev=2
+ * tries to reconnect with MPA_rev 1 for the same EP through
+ * c4iw_reconnect(), where the same EP is assigned with new tid for
+ * further connection establishment. As we are using the same EP pointer
+ * for reconnect, few skbs are used during the previous c4iw_connect(),
+ * which leaves the EP with inadequate skbs for further
+ * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty
+ * skb_list() during peer_abort(). Allocate skbs which is already used.
+ */
+ size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
+ if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
+ err = -ENOMEM;
+ goto fail1;
+ }
+
/*
* Allocate an active TID to initiate a TCP connection.
*/
* response of 1st connect request.
*/
connect_reply_upcall(ep, -ECONNRESET);
+fail1:
c4iw_put_ep(&ep->com);
out:
return err;
if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
child_ep->mtu = peer_mss + hdrs;
+ skb_queue_head_init(&child_ep->com.ep_skb_list);
+ if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
+ goto fail;
+
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
child_ep->com.cm_id = NULL;
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
goto out;
+fail:
+ c4iw_put_ep(&child_ep->com);
reject:
reject_cr(dev, hwtid, skb);
if (parent_ep)
ep->com.state = MPA_REQ_WAIT;
start_ep_timer(ep);
set_bit(PASS_ESTAB, &ep->com.history);
- ret = send_flowc(ep, skb);
+ ret = send_flowc(ep);
mutex_unlock(&ep->com.mutex);
if (ret)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
}
mutex_unlock(&ep->com.mutex);
- rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
- if (!rpl_skb) {
- printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
- __func__);
+ rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
+ if (WARN_ON(!rpl_skb)) {
release = 1;
goto out;
}
PDBG("%s last streaming msg ack ep %p tid %u state %u "
"initiator %u freeing skb\n", __func__, ep, ep->hwtid,
state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
- mutex_lock(&ep->com.mutex);
if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
stop_ep_timer(ep);
mutex_unlock(&ep->com.mutex);
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
- int err = 0;
- int disconnect = 0;
+ int abort;
struct c4iw_ep *ep = to_ep(cm_id);
+
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
}
set_bit(ULP_REJECT, &ep->com.history);
if (mpa_rev == 0)
- disconnect = 2;
- else {
- err = send_mpa_reject(ep, pdata, pdata_len);
- disconnect = 1;
- }
+ abort = 1;
+ else
+ abort = send_mpa_reject(ep, pdata, pdata_len);
mutex_unlock(&ep->com.mutex);
- if (disconnect) {
- stop_ep_timer(ep);
- err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
- }
+
+ stop_ep_timer(ep);
+ c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return 0;
}
err = -ENOMEM;
goto out;
}
+
+ skb_queue_head_init(&ep->com.ep_skb_list);
+ if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
+ err = -ENOMEM;
+ goto fail1;
+ }
+
init_timer(&ep->timer);
ep->plen = conn_param->private_data_len;
if (ep->plen)
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
- goto fail1;
+ goto fail2;
}
ref_qp(ep);
PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
if (ep->atid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
err = -ENOMEM;
- goto fail1;
+ goto fail2;
}
insert_handle(dev, &dev->atid_idr, ep, ep->atid);
if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail1;
+ goto fail2;
}
/* find a route */
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail1;
+ goto fail2;
}
/* find a route */
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
- goto fail2;
+ goto fail3;
}
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
- goto fail3;
+ goto fail4;
}
PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
goto out;
cxgb4_l2t_release(ep->l2t);
-fail3:
+fail4:
dst_release(ep->dst);
-fail2:
+fail3:
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
-fail1:
+fail2:
+ skb_queue_purge(&ep->com.ep_skb_list);
deref_cm_id(&ep->com);
+fail1:
c4iw_put_ep(&ep->com);
out:
return err;
err = -ENOMEM;
goto fail1;
}
+ skb_queue_head_init(&ep->com.ep_skb_list);
PDBG("%s ep %p\n", __func__, ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
+ case CONNECTING:
close = 1;
if (abrupt)
ep->com.state = ABORTING;
else {
ep->com.state = CLOSING;
+
+ /*
+ * if we close before we see the fw4_ack() then we fix
+ * up the timer state since we're reusing it.
+ */
+ if (ep->mpa_skb &&
+ test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
+ clear_bit(STOP_MPA_TIMER, &ep->com.flags);
+ stop_ep_timer(ep);
+ }
start_ep_timer(ep);
}
set_bit(CLOSE_SENT, &ep->com.flags);
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
close_complete_upcall(ep, -ECONNRESET);
- ret = send_abort(ep, NULL, gfp);
+ ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
- ret = send_halfclose(ep, gfp);
+ ret = send_halfclose(ep);
}
if (ret) {
set_bit(EP_DISC_FAIL, &ep->com.history);
#include "iw_cxgb4.h"
static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
- struct c4iw_dev_ucontext *uctx)
+ struct c4iw_dev_ucontext *uctx, struct sk_buff *skb)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
struct c4iw_wr_wait wr_wait;
- struct sk_buff *skb;
int ret;
wr_len = sizeof *res_wr + sizeof *res;
- skb = alloc_skb(wr_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
: NULL;
destroy_cq(&chp->rhp->rdev, &chp->cq,
- ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
+ ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
+ chp->destroy_skb);
+ chp->destroy_skb = NULL;
kfree(chp);
return 0;
}
struct c4iw_cq *chp;
struct c4iw_create_cq_resp uresp;
struct c4iw_ucontext *ucontext = NULL;
- int ret;
+ int ret, wr_len;
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
if (!chp)
return ERR_PTR(-ENOMEM);
+ wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+ chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!chp->destroy_skb) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
if (ib_context)
ucontext = to_c4iw_ucontext(ib_context);
ret = create_cq(&rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
if (ret)
- goto err1;
+ goto err2;
chp->rhp = rhp;
chp->cq.size--; /* status page */
init_waitqueue_head(&chp->wait);
ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
if (ret)
- goto err2;
+ goto err3;
if (ucontext) {
mm = kmalloc(sizeof *mm, GFP_KERNEL);
if (!mm)
- goto err3;
+ goto err4;
mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
if (!mm2)
- goto err4;
+ goto err5;
uresp.qid_mask = rhp->rdev.cqmask;
uresp.cqid = chp->cq.cqid;
ret = ib_copy_to_udata(udata, &uresp,
sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
- goto err5;
+ goto err6;
mm->key = uresp.key;
mm->addr = virt_to_phys(chp->cq.queue);
__func__, chp->cq.cqid, chp, chp->cq.size,
chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
return &chp->ibcq;
-err5:
+err6:
kfree(mm2);
-err4:
+err5:
kfree(mm);
-err3:
+err4:
remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
-err2:
+err3:
destroy_cq(&chp->rhp->rdev, &chp->cq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+ chp->destroy_skb);
+err2:
+ kfree_skb(chp->destroy_skb);
err1:
kfree(chp);
return ERR_PTR(ret);
idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
spin_unlock_irq(&qpd->devp->lock);
- qpd->bufsize = count * 128;
+ qpd->bufsize = count * 180;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
kfree(qpd);
struct ib_mr ibmr;
struct ib_umem *umem;
struct c4iw_dev *rhp;
+ struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
u64 *mpl;
struct c4iw_mw {
struct ib_mw ibmw;
struct c4iw_dev *rhp;
+ struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
};
struct c4iw_cq {
struct ib_cq ibcq;
struct c4iw_dev *rhp;
+ struct sk_buff *destroy_skb;
struct t4_cq cq;
spinlock_t lock;
spinlock_t comp_handler_lock;
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- atomic_t refcnt;
+ struct kref kref;
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
CM_ID_DEREFED = 28,
};
+enum conn_pre_alloc_buffers {
+ CN_ABORT_REQ_BUF,
+ CN_ABORT_RPL_BUF,
+ CN_CLOSE_CON_REQ_BUF,
+ CN_DESTROY_BUF,
+ CN_FLOWC_BUF,
+ CN_MAX_CON_BUF
+};
+
+#define FLOWC_LEN 80
+union cpl_wr_size {
+ struct cpl_abort_req abrt_req;
+ struct cpl_abort_rpl abrt_rpl;
+ struct fw_ri_wr ri_req;
+ struct cpl_close_con_req close_req;
+ char flowc_buf[FLOWC_LEN];
+};
+
struct c4iw_ep_common {
struct iw_cm_id *cm_id;
struct c4iw_qp *qp;
struct c4iw_dev *dev;
+ struct sk_buff_head ep_skb_list;
enum c4iw_ep_state state;
struct kref kref;
struct mutex mutex;
}
static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
- u32 len, dma_addr_t data, int wait)
+ u32 len, dma_addr_t data,
+ int wait, struct sk_buff *skb)
{
- struct sk_buff *skb;
struct ulp_mem_io *req;
struct ulptx_sgl *sgl;
u8 wr_len;
c4iw_init_wr_wait(&wr_wait);
wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
- skb = alloc_skb(wr_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+ if (!skb)
+ return -ENOMEM;
+ }
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
}
static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data)
+ void *data, struct sk_buff *skb)
{
- struct sk_buff *skb;
struct ulp_mem_io *req;
struct ulptx_idata *sc;
u8 wr_len, *to_dp, *from_dp;
wr_len = roundup(sizeof *req + sizeof *sc +
roundup(copy_len, T4_ULPTX_MIN_IO), 16);
- skb = alloc_skb(wr_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+ if (!skb)
+ return -ENOMEM;
+ }
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
(copy_len % T4_ULPTX_MIN_IO));
ret = c4iw_ofld_send(rdev, skb);
+ skb = NULL;
if (ret)
return ret;
len -= C4IW_MAX_INLINE_SIZE;
return ret;
}
-static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
+ void *data, struct sk_buff *skb)
{
u32 remain = len;
u32 dmalen;
dmalen = T4_ULPTX_MAX_DMA;
remain -= dmalen;
ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr,
- !remain);
+ !remain, skb);
if (ret)
goto out;
addr += dmalen >> 5;
daddr += dmalen;
}
if (remain)
- ret = _c4iw_write_mem_inline(rdev, addr, remain, data);
+ ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb);
out:
dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE);
return ret;
* If data is NULL, clear len byte of memory to zero.
*/
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data)
+ void *data, struct sk_buff *skb)
{
if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
if (len > inline_threshold) {
- if (_c4iw_write_mem_dma(rdev, addr, len, data)) {
+ if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
printk_ratelimited(KERN_WARNING
"%s: dma map"
" failure (non fatal)\n",
pci_name(rdev->lldi.pdev));
return _c4iw_write_mem_inline(rdev, addr, len,
- data);
- } else
+ data, skb);
+ } else {
return 0;
+ }
} else
- return _c4iw_write_mem_inline(rdev, addr, len, data);
+ return _c4iw_write_mem_inline(rdev, addr,
+ len, data, skb);
} else
- return _c4iw_write_mem_inline(rdev, addr, len, data);
+ return _c4iw_write_mem_inline(rdev, addr, len, data, skb);
}
/*
u32 *stag, u8 stag_state, u32 pdid,
enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
int bind_enabled, u32 zbva, u64 to,
- u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr)
+ u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr,
+ struct sk_buff *skb)
{
int err;
struct fw_ri_tpte tpt;
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt);
+ sizeof(tpt), &tpt, skb);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
__func__, pbl_addr, rdev->lldi.vr->pbl.start,
pbl_size);
- err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl);
+ err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
return err;
}
static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
- u32 pbl_addr)
+ u32 pbl_addr, struct sk_buff *skb)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
- pbl_size, pbl_addr);
+ pbl_size, pbl_addr, skb);
}
static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
- 0UL, 0, 0, 0, 0);
+ 0UL, 0, 0, 0, 0, NULL);
}
-static int deallocate_window(struct c4iw_rdev *rdev, u32 stag)
+static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
+ struct sk_buff *skb)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
- 0);
+ 0, skb);
}
static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
- 0UL, 0, 0, pbl_size, pbl_addr);
+ 0UL, 0, 0, pbl_size, pbl_addr, NULL);
}
static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.mw_bind_enable, mhp->attr.zbva,
mhp->attr.va_fbo, mhp->attr.len ?
mhp->attr.len : -1, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL);
if (ret)
return ret;
ret = finish_mem_reg(mhp, stag);
- if (ret)
+ if (ret) {
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
+ mhp->dereg_skb = NULL;
+ }
return ret;
}
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
+ if (!mhp->dereg_skb) {
+ ret = -ENOMEM;
+ goto err0;
+ }
+
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
FW_RI_STAG_NSMR, mhp->attr.perms,
- mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0);
+ mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0,
+ NULL);
if (ret)
goto err1;
return &mhp->ibmr;
err2:
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
err1:
+ kfree_skb(mhp->dereg_skb);
+err0:
kfree(mhp);
return ERR_PTR(ret);
}
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
+ if (!mhp->dereg_skb) {
+ kfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
+
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
+ kfree_skb(mhp->dereg_skb);
kfree(mhp);
return ERR_PTR(err);
}
err:
ib_umem_release(mhp->umem);
+ kfree_skb(mhp->dereg_skb);
kfree(mhp);
return ERR_PTR(err);
}
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
- ret = allocate_window(&rhp->rdev, &stag, php->pdid);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
+
+ mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
+ if (!mhp->dereg_skb) {
+ ret = -ENOMEM;
+ goto free_mhp;
}
+
+ ret = allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret)
+ goto free_skb;
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.type = FW_RI_STAG_MW;
mmid = (stag) >> 8;
mhp->ibmw.rkey = stag;
if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
- deallocate_window(&rhp->rdev, mhp->attr.stag);
- kfree(mhp);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto dealloc_win;
}
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
+
+dealloc_win:
+ deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
+free_skb:
+ kfree_skb(mhp->dereg_skb);
+free_mhp:
+ kfree(mhp);
+ return ERR_PTR(ret);
}
int c4iw_dealloc_mw(struct ib_mw *mw)
rhp = mhp->rhp;
mmid = (mw->rkey) >> 8;
remove_handle(rhp, &rhp->mmidr, mmid);
- deallocate_window(&rhp->rdev, mhp->attr.stag);
+ deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
+ kfree_skb(mhp->dereg_skb);
kfree(mhp);
PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
return 0;
return &(mhp->ibmr);
err3:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
err2:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
if (mhp->attr.pbl_size)
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
- ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
-
- return sprintf(buf, "%u.%u.%u.%u\n",
- FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
-}
-
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *c4iw_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
};
return 0;
}
+static void get_dev_fw_str(struct ib_device *dev, char *str,
+ size_t str_len)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
+ ibdev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+
+ snprintf(str, str_len, "%u.%u.%u.%u",
+ FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
+}
+
int c4iw_register_device(struct c4iw_dev *dev)
{
int ret;
dev->ibdev.get_hw_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
+ dev->ibdev.get_dev_fw_str = get_dev_fw_str;
dev->ibdev.drain_sq = c4iw_drain_sq;
dev->ibdev.drain_rq = c4iw_drain_rq;
return 0;
}
+void _free_qp(struct kref *kref)
+{
+ struct c4iw_qp *qhp;
+
+ qhp = container_of(kref, struct c4iw_qp, kref);
+ PDBG("%s qhp %p\n", __func__, qhp);
+ kfree(qhp);
+}
+
void c4iw_qp_add_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_c4iw_qp(qp)->refcnt));
+ kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
- wake_up(&(to_c4iw_qp(qp)->wait));
+ kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
qhp->ep->hwtid);
- skb = alloc_skb(sizeof *wqe, gfp);
- if (!skb)
+ skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
+ if (WARN_ON(!skb))
return;
+
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
ep->hwtid);
- skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
- if (!skb)
+ skb = skb_dequeue(&ep->com.ep_skb_list);
+ if (WARN_ON(!skb))
return -ENOMEM;
+
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
spin_lock_irq(&rhp->lock);
if (!list_empty(&qhp->db_fc_entry))
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ c4iw_qp_rem_ref(ib_qp);
+
PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
- kfree(qhp);
return 0;
}
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
+ kref_init(&qhp->kref);
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
return 0;
}
+static void move_qp_to_err(struct c4iw_qp *qp)
+{
+ struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR };
+
+ (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+}
+
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
+ move_qp_to_err(qp);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_sq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
unsigned long flag;
bool need_to_wait;
+ move_qp_to_err(qp);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_rq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
config INFINIBAND_HFI1
tristate "Intel OPA Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C
select MMU_NOTIFIER
select CRC32
- default m
+ select I2C_ALGOBIT
---help---
This is a low-level driver for Intel OPA Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
eprom.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
- qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
+ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
verbs_txreq.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/cpumask.h>
#include "hfi.h"
#include "affinity.h"
#include "sdma.h"
#include "trace.h"
+struct hfi1_affinity_node_list node_affinity = {
+ .list = LIST_HEAD_INIT(node_affinity.list),
+ .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+};
+
/* Name of IRQ types, indexed by enum irq_type */
static const char * const irq_type_names[] = {
"SDMA",
"OTHER",
};
+/* Per NUMA node count of HFI devices */
+static unsigned int *hfi1_per_node_cntr;
+
static inline void init_cpu_mask_set(struct cpu_mask_set *set)
{
cpumask_clear(&set->mask);
}
/* Initialize non-HT cpu cores mask */
-int init_real_cpu_mask(struct hfi1_devdata *dd)
+void init_real_cpu_mask(void)
{
- struct hfi1_affinity *info;
int possible, curr_cpu, i, ht;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- cpumask_clear(&info->real_cpu_mask);
+ cpumask_clear(&node_affinity.real_cpu_mask);
/* Start with cpu online mask as the real cpu mask */
- cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
+ cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
/*
* Remove HT cores from the real cpu mask. Do this in two steps below.
*/
- possible = cpumask_weight(&info->real_cpu_mask);
+ possible = cpumask_weight(&node_affinity.real_cpu_mask);
ht = cpumask_weight(topology_sibling_cpumask(
- cpumask_first(&info->real_cpu_mask)));
+ cpumask_first(&node_affinity.real_cpu_mask)));
/*
* Step 1. Skip over the first N HT siblings and use them as the
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
- curr_cpu = cpumask_first(&info->real_cpu_mask);
+ curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
for (i = 0; i < possible / ht; i++)
- curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+ curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
/*
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
* skip any gaps.
*/
for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
- curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+ cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
+ curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
+ }
+}
+
+int node_affinity_init(void)
+{
+ int node;
+ struct pci_dev *dev = NULL;
+ const struct pci_device_id *ids = hfi1_pci_tbl;
+
+ cpumask_clear(&node_affinity.proc.used);
+ cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
+
+ node_affinity.proc.gen = 0;
+ node_affinity.num_core_siblings =
+ cpumask_weight(topology_sibling_cpumask(
+ cpumask_first(&node_affinity.proc.mask)
+ ));
+ node_affinity.num_online_nodes = num_online_nodes();
+ node_affinity.num_online_cpus = num_online_cpus();
+
+ /*
+ * The real cpu mask is part of the affinity struct but it has to be
+ * initialized early. It is needed to calculate the number of user
+ * contexts in set_up_context_variables().
+ */
+ init_real_cpu_mask();
+
+ hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
+ sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
+ if (!hfi1_per_node_cntr)
+ return -ENOMEM;
+
+ while (ids->vendor) {
+ dev = NULL;
+ while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
+ node = pcibus_to_node(dev->bus);
+ if (node < 0)
+ node = numa_node_id();
+
+ hfi1_per_node_cntr[node]++;
+ }
+ ids++;
}
- dd->affinity = info;
return 0;
}
+void node_affinity_destroy(void)
+{
+ struct list_head *pos, *q;
+ struct hfi1_affinity_node *entry;
+
+ spin_lock(&node_affinity.lock);
+ list_for_each_safe(pos, q, &node_affinity.list) {
+ entry = list_entry(pos, struct hfi1_affinity_node,
+ list);
+ list_del(pos);
+ kfree(entry);
+ }
+ spin_unlock(&node_affinity.lock);
+ kfree(hfi1_per_node_cntr);
+}
+
+static struct hfi1_affinity_node *node_affinity_allocate(int node)
+{
+ struct hfi1_affinity_node *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+ entry->node = node;
+ INIT_LIST_HEAD(&entry->list);
+
+ return entry;
+}
+
+/*
+ * It appends an entry to the list.
+ * It *must* be called with node_affinity.lock held.
+ */
+static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
+{
+ list_add_tail(&entry->list, &node_affinity.list);
+}
+
+/* It must be called with node_affinity.lock held */
+static struct hfi1_affinity_node *node_affinity_lookup(int node)
+{
+ struct list_head *pos;
+ struct hfi1_affinity_node *entry;
+
+ list_for_each(pos, &node_affinity.list) {
+ entry = list_entry(pos, struct hfi1_affinity_node, list);
+ if (entry->node == node)
+ return entry;
+ }
+
+ return NULL;
+}
+
/*
* Interrupt affinity.
*
* to the node relative 1 as necessary.
*
*/
-void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
int node = pcibus_to_node(dd->pcidev->bus);
- struct hfi1_affinity *info = dd->affinity;
+ struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
int curr_cpu, possible, i;
node = numa_node_id();
dd->node = node;
- spin_lock_init(&info->lock);
-
- init_cpu_mask_set(&info->def_intr);
- init_cpu_mask_set(&info->rcv_intr);
- init_cpu_mask_set(&info->proc);
-
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- /* Use the "real" cpu mask of this node as the default */
- cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
-
- /* fill in the receive list */
- possible = cpumask_weight(&info->def_intr.mask);
- curr_cpu = cpumask_first(&info->def_intr.mask);
- if (possible == 1) {
- /* only one CPU, everyone will use it */
- cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
- } else {
- /*
- * Retain the first CPU in the default list for the control
- * context.
- */
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
- /*
- * Remove the remaining kernel receive queues from
- * the default list and add them to the receive list.
- */
- for (i = 0; i < dd->n_krcv_queues - 1; i++) {
- cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
- cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
- if (curr_cpu >= nr_cpu_ids)
- break;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
+ /*
+ * If this is the first time this NUMA node's affinity is used,
+ * create an entry in the global affinity structure and initialize it.
+ */
+ if (!entry) {
+ entry = node_affinity_allocate(node);
+ if (!entry) {
+ dd_dev_err(dd,
+ "Unable to allocate global affinity node\n");
+ return -ENOMEM;
}
- }
+ init_cpu_mask_set(&entry->def_intr);
+ init_cpu_mask_set(&entry->rcv_intr);
+ cpumask_clear(&entry->general_intr_mask);
+ /* Use the "real" cpu mask of this node as the default */
+ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
+ local_mask);
+
+ /* fill in the receive list */
+ possible = cpumask_weight(&entry->def_intr.mask);
+ curr_cpu = cpumask_first(&entry->def_intr.mask);
+
+ if (possible == 1) {
+ /* only one CPU, everyone will use it */
+ cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
+ cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
+ } else {
+ /*
+ * The general/control context will be the first CPU in
+ * the default list, so it is removed from the default
+ * list and added to the general interrupt list.
+ */
+ cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
+ cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
+ curr_cpu = cpumask_next(curr_cpu,
+ &entry->def_intr.mask);
- cpumask_copy(&info->proc.mask, cpu_online_mask);
-}
+ /*
+ * Remove the remaining kernel receive queues from
+ * the default list and add them to the receive list.
+ */
+ for (i = 0;
+ i < (dd->n_krcv_queues - 1) *
+ hfi1_per_node_cntr[dd->node];
+ i++) {
+ cpumask_clear_cpu(curr_cpu,
+ &entry->def_intr.mask);
+ cpumask_set_cpu(curr_cpu,
+ &entry->rcv_intr.mask);
+ curr_cpu = cpumask_next(curr_cpu,
+ &entry->def_intr.mask);
+ if (curr_cpu >= nr_cpu_ids)
+ break;
+ }
-void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
-{
- kfree(dd->affinity);
+ /*
+ * If there ends up being 0 CPU cores leftover for SDMA
+ * engines, use the same CPU cores as general/control
+ * context.
+ */
+ if (cpumask_weight(&entry->def_intr.mask) == 0)
+ cpumask_copy(&entry->def_intr.mask,
+ &entry->general_intr_mask);
+ }
+
+ spin_lock(&node_affinity.lock);
+ node_affinity_add_tail(entry);
+ spin_unlock(&node_affinity.lock);
+ }
+
+ return 0;
}
int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
{
int ret;
cpumask_var_t diff;
- struct cpu_mask_set *set;
+ struct hfi1_affinity_node *entry;
+ struct cpu_mask_set *set = NULL;
struct sdma_engine *sde = NULL;
struct hfi1_ctxtdata *rcd = NULL;
char extra[64];
if (!ret)
return -ENOMEM;
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
switch (msix->type) {
case IRQ_SDMA:
sde = (struct sdma_engine *)msix->arg;
scnprintf(extra, 64, "engine %u", sde->this_idx);
- /* fall through */
+ set = &entry->def_intr;
+ break;
case IRQ_GENERAL:
- set = &dd->affinity->def_intr;
+ cpu = cpumask_first(&entry->general_intr_mask);
break;
case IRQ_RCVCTXT:
rcd = (struct hfi1_ctxtdata *)msix->arg;
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- set = &dd->affinity->def_intr;
- cpu = cpumask_first(&set->mask);
- } else {
- set = &dd->affinity->rcv_intr;
- }
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ cpu = cpumask_first(&entry->general_intr_mask);
+ else
+ set = &entry->rcv_intr;
scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
break;
default:
}
/*
- * The control receive context is placed on a particular CPU, which
- * is set above. Skip accounting for it. Everything else finds its
- * CPU here.
+ * The general and control contexts are placed on a particular
+ * CPU, which is set above. Skip accounting for it. Everything else
+ * finds its CPU here.
*/
- if (cpu == -1) {
- spin_lock(&dd->affinity->lock);
+ if (cpu == -1 && set) {
+ spin_lock(&node_affinity.lock);
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&dd->affinity->lock);
+ spin_unlock(&node_affinity.lock);
}
switch (msix->type) {
{
struct cpu_mask_set *set = NULL;
struct hfi1_ctxtdata *rcd;
+ struct hfi1_affinity_node *entry;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
+ set = &entry->def_intr;
+ break;
case IRQ_GENERAL:
- set = &dd->affinity->def_intr;
+ /* Don't do accounting for general contexts */
break;
case IRQ_RCVCTXT:
rcd = (struct hfi1_ctxtdata *)msix->arg;
- /* only do accounting for non control contexts */
+ /* Don't do accounting for control contexts */
if (rcd->ctxt != HFI1_CTRL_CTXT)
- set = &dd->affinity->rcv_intr;
+ set = &entry->rcv_intr;
break;
default:
return;
}
if (set) {
- spin_lock(&dd->affinity->lock);
+ spin_lock(&node_affinity.lock);
cpumask_andnot(&set->used, &set->used, &msix->mask);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&dd->affinity->lock);
+ spin_unlock(&node_affinity.lock);
}
irq_set_affinity_hint(msix->msix.vector, NULL);
cpumask_clear(&msix->mask);
}
-int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
+/* This should be called with node_affinity.lock held */
+static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
+ struct hfi1_affinity_node_list *affinity)
{
- int cpu = -1, ret;
- cpumask_var_t diff, mask, intrs;
+ int possible, curr_cpu, i;
+ uint num_cores_per_socket = node_affinity.num_online_cpus /
+ affinity->num_core_siblings /
+ node_affinity.num_online_nodes;
+
+ cpumask_copy(hw_thread_mask, &affinity->proc.mask);
+ if (affinity->num_core_siblings > 0) {
+ /* Removing other siblings not needed for now */
+ possible = cpumask_weight(hw_thread_mask);
+ curr_cpu = cpumask_first(hw_thread_mask);
+ for (i = 0;
+ i < num_cores_per_socket * node_affinity.num_online_nodes;
+ i++)
+ curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
+
+ for (; i < possible; i++) {
+ cpumask_clear_cpu(curr_cpu, hw_thread_mask);
+ curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
+ }
+
+ /* Identifying correct HW threads within physical cores */
+ cpumask_shift_left(hw_thread_mask, hw_thread_mask,
+ num_cores_per_socket *
+ node_affinity.num_online_nodes *
+ hw_thread_no);
+ }
+}
+
+int hfi1_get_proc_affinity(int node)
+{
+ int cpu = -1, ret, i;
+ struct hfi1_affinity_node *entry;
+ cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
*proc_mask = tsk_cpus_allowed(current);
- struct cpu_mask_set *set = &dd->affinity->proc;
+ struct hfi1_affinity_node_list *affinity = &node_affinity;
+ struct cpu_mask_set *set = &affinity->proc;
/*
* check whether process/context affinity has already
/*
* The process does not have a preset CPU affinity so find one to
- * recommend. We prefer CPUs on the same NUMA as the device.
+ * recommend using the following algorithm:
+ *
+ * For each user process that is opening a context on HFI Y:
+ * a) If all cores are filled, reinitialize the bitmask
+ * b) Fill real cores first, then HT cores (First set of HT
+ * cores on all physical cores, then second set of HT core,
+ * and, so on) in the following order:
+ *
+ * 1. Same NUMA node as HFI Y and not running an IRQ
+ * handler
+ * 2. Same NUMA node as HFI Y and running an IRQ handler
+ * 3. Different NUMA node to HFI Y and not running an IRQ
+ * handler
+ * 4. Different NUMA node to HFI Y and running an IRQ
+ * handler
+ * c) Mark core as filled in the bitmask. As user processes are
+ * done, clear cores from the bitmask.
*/
ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
if (!ret)
goto done;
- ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
if (!ret)
goto free_diff;
- ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
+ ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
+ if (!ret)
+ goto free_hw_thread_mask;
+ ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
if (!ret)
- goto free_mask;
+ goto free_available_mask;
- spin_lock(&dd->affinity->lock);
+ spin_lock(&affinity->lock);
/*
- * If we've used all available CPUs, clear the mask and start
+ * If we've used all available HW threads, clear the mask and start
* overloading.
*/
if (cpumask_equal(&set->mask, &set->used)) {
cpumask_clear(&set->used);
}
- /* CPUs used by interrupt handlers */
- cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
- &dd->affinity->def_intr.mask :
- &dd->affinity->def_intr.used));
- cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
- &dd->affinity->rcv_intr.mask :
- &dd->affinity->rcv_intr.used));
+ /*
+ * If NUMA node has CPUs used by interrupt handlers, include them in the
+ * interrupt handler mask.
+ */
+ entry = node_affinity_lookup(node);
+ if (entry) {
+ cpumask_copy(intrs_mask, (entry->def_intr.gen ?
+ &entry->def_intr.mask :
+ &entry->def_intr.used));
+ cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
+ &entry->rcv_intr.mask :
+ &entry->rcv_intr.used));
+ cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
+ }
hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
- cpumask_pr_args(intrs));
+ cpumask_pr_args(intrs_mask));
+
+ cpumask_copy(hw_thread_mask, &set->mask);
/*
- * If we don't have a NUMA node requested, preference is towards
- * device NUMA node
+ * If HT cores are enabled, identify which HW threads within the
+ * physical cores should be used.
*/
- if (node == -1)
- node = dd->node;
+ if (affinity->num_core_siblings > 0) {
+ for (i = 0; i < affinity->num_core_siblings; i++) {
+ find_hw_thread_mask(i, hw_thread_mask, affinity);
+
+ /*
+ * If there's at least one available core for this HW
+ * thread number, stop looking for a core.
+ *
+ * diff will always be not empty at least once in this
+ * loop as the used mask gets reset when
+ * (set->mask == set->used) before this loop.
+ */
+ cpumask_andnot(diff, hw_thread_mask, &set->used);
+ if (!cpumask_empty(diff))
+ break;
+ }
+ }
+ hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
+ cpumask_pr_args(hw_thread_mask));
+
node_mask = cpumask_of_node(node);
- hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node,
+ hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
cpumask_pr_args(node_mask));
- /* diff will hold all unused cpus */
- cpumask_andnot(diff, &set->mask, &set->used);
- hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff));
-
- /* get cpumask of available CPUs on preferred NUMA */
- cpumask_and(mask, diff, node_mask);
- hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask));
+ /* Get cpumask of available CPUs on preferred NUMA */
+ cpumask_and(available_mask, hw_thread_mask, node_mask);
+ cpumask_andnot(available_mask, available_mask, &set->used);
+ hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
+ cpumask_pr_args(available_mask));
/*
* At first, we don't want to place processes on the same
- * CPUs as interrupt handlers.
+ * CPUs as interrupt handlers. Then, CPUs running interrupt
+ * handlers are used.
+ *
+ * 1) If diff is not empty, then there are CPUs not running
+ * non-interrupt handlers available, so diff gets copied
+ * over to available_mask.
+ * 2) If diff is empty, then all CPUs not running interrupt
+ * handlers are taken, so available_mask contains all
+ * available CPUs running interrupt handlers.
+ * 3) If available_mask is empty, then all CPUs on the
+ * preferred NUMA node are taken, so other NUMA nodes are
+ * used for process assignments using the same method as
+ * the preferred NUMA node.
*/
- cpumask_andnot(diff, mask, intrs);
+ cpumask_andnot(diff, available_mask, intrs_mask);
if (!cpumask_empty(diff))
- cpumask_copy(mask, diff);
+ cpumask_copy(available_mask, diff);
- /*
- * if we don't have a cpu on the preferred NUMA, get
- * the list of the remaining available CPUs
- */
- if (cpumask_empty(mask)) {
- cpumask_andnot(diff, &set->mask, &set->used);
- cpumask_andnot(mask, diff, node_mask);
+ /* If we don't have CPUs on the preferred node, use other NUMA nodes */
+ if (cpumask_empty(available_mask)) {
+ cpumask_andnot(available_mask, hw_thread_mask, &set->used);
+ /* Excluding preferred NUMA cores */
+ cpumask_andnot(available_mask, available_mask, node_mask);
+ hfi1_cdbg(PROC,
+ "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
+ cpumask_pr_args(available_mask));
+
+ /*
+ * At first, we don't want to place processes on the same
+ * CPUs as interrupt handlers.
+ */
+ cpumask_andnot(diff, available_mask, intrs_mask);
+ if (!cpumask_empty(diff))
+ cpumask_copy(available_mask, diff);
}
- hfi1_cdbg(PROC, "possible CPUs for process %*pbl",
- cpumask_pr_args(mask));
+ hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
+ cpumask_pr_args(available_mask));
- cpu = cpumask_first(mask);
+ cpu = cpumask_first(available_mask);
if (cpu >= nr_cpu_ids) /* empty */
cpu = -1;
else
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&dd->affinity->lock);
-
- free_cpumask_var(intrs);
-free_mask:
- free_cpumask_var(mask);
+ spin_unlock(&affinity->lock);
+ hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
+
+ free_cpumask_var(intrs_mask);
+free_available_mask:
+ free_cpumask_var(available_mask);
+free_hw_thread_mask:
+ free_cpumask_var(hw_thread_mask);
free_diff:
free_cpumask_var(diff);
done:
return cpu;
}
-void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
+void hfi1_put_proc_affinity(int cpu)
{
- struct cpu_mask_set *set = &dd->affinity->proc;
+ struct hfi1_affinity_node_list *affinity = &node_affinity;
+ struct cpu_mask_set *set = &affinity->proc;
if (cpu < 0)
return;
- spin_lock(&dd->affinity->lock);
+ spin_lock(&affinity->lock);
cpumask_clear_cpu(cpu, &set->used);
+ hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&dd->affinity->lock);
+ spin_unlock(&affinity->lock);
}
+/* Prevents concurrent reads and writes of the sdma_affinity attrib */
+static DEFINE_MUTEX(sdma_affinity_mutex);
+
+int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
+ size_t count)
+{
+ struct hfi1_affinity_node *entry;
+ struct cpumask mask;
+ int ret, i;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
+ if (!entry)
+ return -EINVAL;
+
+ ret = cpulist_parse(buf, &mask);
+ if (ret)
+ return ret;
+
+ if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
+ dd_dev_warn(dd, "Invalid CPU mask\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&sdma_affinity_mutex);
+ /* reset the SDMA interrupt affinity details */
+ init_cpu_mask_set(&entry->def_intr);
+ cpumask_copy(&entry->def_intr.mask, &mask);
+ /*
+ * Reassign the affinity for each SDMA interrupt.
+ */
+ for (i = 0; i < dd->num_msix_entries; i++) {
+ struct hfi1_msix_entry *msix;
+
+ msix = &dd->msix_entries[i];
+ if (msix->type != IRQ_SDMA)
+ continue;
+
+ ret = hfi1_get_irq_affinity(dd, msix);
+
+ if (ret)
+ break;
+ }
+
+ mutex_unlock(&sdma_affinity_mutex);
+ return ret ? ret : strnlen(buf, PAGE_SIZE);
+}
+
+int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
+{
+ struct hfi1_affinity_node *entry;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
+ if (!entry)
+ return -EINVAL;
+
+ mutex_lock(&sdma_affinity_mutex);
+ cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
+ mutex_unlock(&sdma_affinity_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
- struct cpu_mask_set proc;
struct cpumask real_cpu_mask;
/* spin lock to protect affinity struct */
spinlock_t lock;
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
-int init_real_cpu_mask(struct hfi1_devdata *);
+void init_real_cpu_mask(void);
/* Initialize driver affinity data */
-void hfi1_dev_affinity_init(struct hfi1_devdata *);
-/* Free driver affinity data */
-void hfi1_dev_affinity_free(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
/*
* Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it.
* Determine a CPU affinity for a user process, if the process does not
* have an affinity set yet.
*/
-int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
+int hfi1_get_proc_affinity(int);
/* Release a CPU used by a user process. */
-void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
+void hfi1_put_proc_affinity(int);
+
+int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
+int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
+ size_t count);
+
+struct hfi1_affinity_node {
+ int node;
+ struct cpu_mask_set def_intr;
+ struct cpu_mask_set rcv_intr;
+ struct cpumask general_intr_mask;
+ struct list_head list;
+};
+
+struct hfi1_affinity_node_list {
+ struct list_head list;
+ struct cpumask real_cpu_mask;
+ struct cpu_mask_set proc;
+ int num_core_siblings;
+ int num_online_nodes;
+ int num_online_cpus;
+ /* protect affinity node list */
+ spinlock_t lock;
+};
+
+int node_affinity_init(void);
+void node_affinity_destroy(void);
+extern struct hfi1_affinity_node_list node_affinity;
#endif /* _HFI1_AFFINITY_H */
#include "efivar.h"
#include "platform.h"
#include "aspm.h"
+#include "affinity.h"
#define NUM_IB_PORTS 1
#define SEC_SC_HALTED 0x4 /* per-context only */
#define SEC_SPC_FREEZE 0x8 /* per-HFI only */
+#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
/* sizes for both the QP and RSM map tables */
/* all CceStatus sub-block RXE pause bits */
#define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
+#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
+#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
+
/*
* CCE Error flags.
*/
return dd->sw_send_dma_eng_err_status_cnt[0];
}
+static u64 access_dc_rcv_err_cnt(const struct cntr_entry *entry,
+ void *context, int vl, int mode,
+ u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ u64 val = 0;
+ u64 csr = entry->csr;
+
+ val = read_write_csr(dd, csr, mode, data);
+ if (mode == CNTR_MODE_R) {
+ val = val > CNTR_MAX - dd->sw_rcv_bypass_packet_errors ?
+ CNTR_MAX : val + dd->sw_rcv_bypass_packet_errors;
+ } else if (mode == CNTR_MODE_W) {
+ dd->sw_rcv_bypass_packet_errors = 0;
+ } else {
+ dd_dev_err(dd, "Invalid cntr register access mode");
+ return 0;
+ }
+ return val;
+}
+
#define def_access_sw_cpu(cntr) \
static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \
void *context, int vl, int mode, u64 data) \
CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
[C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
CNTR_SYNTH),
-[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
+[C_DC_RCV_ERR] = CNTR_ELEM("DcRecvErr", DCC_ERR_PORTRCV_ERR_CNT, 0, CNTR_SYNTH,
+ access_dc_rcv_err_cnt),
[C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
CNTR_SYNTH),
[C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
}
-static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
-{
- u32 frame, version, prod_id;
- int ret, lane;
-
- /* 4 lanes */
- for (lane = 0; lane < 4; lane++) {
- ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
- if (ret) {
- dd_dev_err(dd,
- "Unable to read lane %d firmware details\n",
- lane);
- continue;
- }
- version = (frame >> SPICO_ROM_VERSION_SHIFT)
- & SPICO_ROM_VERSION_MASK;
- prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
- & SPICO_ROM_PROD_ID_MASK;
- dd_dev_info(dd,
- "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
- lane, version, prod_id);
- }
-}
-
/*
* Read an idle LCB message.
*
unsigned long timeout;
/*
- * Check for QSFP interrupt for t_init (SFF 8679)
+ * Some QSFP cables have a quirk that asserts the IntN line as a side
+ * effect of power up on plug-in. We ignore this false positive
+ * interrupt until the module has finished powering up by waiting for
+ * a minimum timeout of the module inrush initialization time of
+ * 500 ms (SFF 8679 Table 5-6) to ensure the voltage rails in the
+ * module have stabilized.
+ */
+ msleep(500);
+
+ /*
+ * Check for QSFP interrupt for t_init (SFF 8679 Table 8-1)
*/
timeout = jiffies + msecs_to_jiffies(2000);
while (1) {
mask = read_csr(dd, dd->hfi1_id ?
ASIC_QSFP2_IN : ASIC_QSFP1_IN);
- if (!(mask & QSFP_HFI0_INT_N)) {
- write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
- ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+ if (!(mask & QSFP_HFI0_INT_N))
break;
- }
if (time_after(jiffies, timeout)) {
dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
__func__);
u64 mask;
mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
- if (enable)
+ if (enable) {
+ /*
+ * Clear the status register to avoid an immediate interrupt
+ * when we re-enable the IntN pin
+ */
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+ QSFP_HFI0_INT_N);
mask |= (u64)QSFP_HFI0_INT_N;
- else
+ } else {
mask &= ~(u64)QSFP_HFI0_INT_N;
+ }
write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
}
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
- struct hfi1_ctxt_info *kinfo)
-{
- kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
- HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
- return 0;
-}
-
struct hfi1_message_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr)
{
return 0;
}
+static const char *state_completed_string(u32 completed)
+{
+ static const char * const state_completed[] = {
+ "EstablishComm",
+ "OptimizeEQ",
+ "VerifyCap"
+ };
+
+ if (completed < ARRAY_SIZE(state_completed))
+ return state_completed[completed];
+
+ return "unknown";
+}
+
+static const char all_lanes_dead_timeout_expired[] =
+ "All lanes were inactive – was the interconnect media removed?";
+static const char tx_out_of_policy[] =
+ "Passing lanes on local port do not meet the local link width policy";
+static const char no_state_complete[] =
+ "State timeout occurred before link partner completed the state";
+static const char * const state_complete_reasons[] = {
+ [0x00] = "Reason unknown",
+ [0x01] = "Link was halted by driver, refer to LinkDownReason",
+ [0x02] = "Link partner reported failure",
+ [0x10] = "Unable to achieve frame sync on any lane",
+ [0x11] =
+ "Unable to find a common bit rate with the link partner",
+ [0x12] =
+ "Unable to achieve frame sync on sufficient lanes to meet the local link width policy",
+ [0x13] =
+ "Unable to identify preset equalization on sufficient lanes to meet the local link width policy",
+ [0x14] = no_state_complete,
+ [0x15] =
+ "State timeout occurred before link partner identified equalization presets",
+ [0x16] =
+ "Link partner completed the EstablishComm state, but the passing lanes do not meet the local link width policy",
+ [0x17] = tx_out_of_policy,
+ [0x20] = all_lanes_dead_timeout_expired,
+ [0x21] =
+ "Unable to achieve acceptable BER on sufficient lanes to meet the local link width policy",
+ [0x22] = no_state_complete,
+ [0x23] =
+ "Link partner completed the OptimizeEq state, but the passing lanes do not meet the local link width policy",
+ [0x24] = tx_out_of_policy,
+ [0x30] = all_lanes_dead_timeout_expired,
+ [0x31] =
+ "State timeout occurred waiting for host to process received frames",
+ [0x32] = no_state_complete,
+ [0x33] =
+ "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy",
+ [0x34] = tx_out_of_policy,
+};
+
+static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd,
+ u32 code)
+{
+ const char *str = NULL;
+
+ if (code < ARRAY_SIZE(state_complete_reasons))
+ str = state_complete_reasons[code];
+
+ if (str)
+ return str;
+ return "Reserved";
+}
+
+/* describe the given last state complete frame */
+static void decode_state_complete(struct hfi1_pportdata *ppd, u32 frame,
+ const char *prefix)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ u32 success;
+ u32 state;
+ u32 reason;
+ u32 lanes;
+
+ /*
+ * Decode frame:
+ * [ 0: 0] - success
+ * [ 3: 1] - state
+ * [ 7: 4] - next state timeout
+ * [15: 8] - reason code
+ * [31:16] - lanes
+ */
+ success = frame & 0x1;
+ state = (frame >> 1) & 0x7;
+ reason = (frame >> 8) & 0xff;
+ lanes = (frame >> 16) & 0xffff;
+
+ dd_dev_err(dd, "Last %s LNI state complete frame 0x%08x:\n",
+ prefix, frame);
+ dd_dev_err(dd, " last reported state state: %s (0x%x)\n",
+ state_completed_string(state), state);
+ dd_dev_err(dd, " state successfully completed: %s\n",
+ success ? "yes" : "no");
+ dd_dev_err(dd, " fail reason 0x%x: %s\n",
+ reason, state_complete_reason_code_string(ppd, reason));
+ dd_dev_err(dd, " passing lane mask: 0x%x", lanes);
+}
+
+/*
+ * Read the last state complete frames and explain them. This routine
+ * expects to be called if the link went down during link negotiation
+ * and initialization (LNI). That is, anywhere between polling and link up.
+ */
+static void check_lni_states(struct hfi1_pportdata *ppd)
+{
+ u32 last_local_state;
+ u32 last_remote_state;
+
+ read_last_local_state(ppd->dd, &last_local_state);
+ read_last_remote_state(ppd->dd, &last_remote_state);
+
+ /*
+ * Don't report anything if there is nothing to report. A value of
+ * 0 means the link was taken down while polling and there was no
+ * training in-process.
+ */
+ if (last_local_state == 0 && last_remote_state == 0)
+ return;
+
+ decode_state_complete(ppd, last_local_state, "transmitted");
+ decode_state_complete(ppd, last_remote_state, "received");
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
{
struct hfi1_devdata *dd = ppd->dd;
u32 pstate, previous_state;
- u32 last_local_state;
- u32 last_remote_state;
int ret;
int do_transition;
int do_wait;
} else if (previous_state
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
/* went down while attempting link up */
- /* byte 1 of last_*_state is the failure reason */
- read_last_local_state(dd, &last_local_state);
- read_last_remote_state(dd, &last_remote_state);
- dd_dev_err(dd,
- "LNI failure last states: local 0x%08x, remote 0x%08x\n",
- last_local_state, last_remote_state);
+ check_lni_states(ppd);
}
/* the active link width (downgrade) is 0 on link down */
dd->cntrnames = NULL;
}
-#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
-#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
-
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
u64 *psval, void *context, int vl)
{
return ib_pstate;
}
-/*
- * Read/modify/write ASIC_QSFP register bits as selected by mask
- * data: 0 or 1 in the positions depending on what needs to be written
- * dir: 0 for read, 1 for write
- * mask: select by setting
- * I2CCLK (bit 0)
- * I2CDATA (bit 1)
- */
-u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
- u32 mask)
-{
- u64 qsfp_oe, target_oe;
-
- target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
- if (mask) {
- /* We are writing register bits, so lock access */
- dir &= mask;
- data &= mask;
-
- qsfp_oe = read_csr(dd, target_oe);
- qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
- write_csr(dd, target_oe, qsfp_oe);
- }
- /* We are exclusively reading bits here, but it is unlikely
- * we'll get valid data when we set the direction of the pin
- * in the same call, so read should call this function again
- * to get valid data
- */
- return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
-}
-
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
/*
* Kernel receive contexts:
- * - min of 2 or 1 context/numa (excluding control context)
* - Context 0 - control context (VL15/multicast/error)
* - Context 1 - first kernel context
* - Context 2 - second kernel context
*/
num_kernel_contexts = n_krcvqs + 1;
else
- num_kernel_contexts = num_online_nodes() + 1;
- num_kernel_contexts =
- max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
+ num_kernel_contexts = DEFAULT_KRCVQS + 1;
/*
* Every kernel receive context needs an ACK send context.
* one send context is allocated for each VL{0-7} and VL15
*/
if (num_user_contexts < 0)
num_user_contexts =
- cpumask_weight(&dd->affinity->real_cpu_mask);
+ cpumask_weight(&node_affinity.real_cpu_mask);
total_contexts = num_kernel_contexts + num_user_contexts;
}
dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+
+ /* first one through - set up i2c devices */
+ if (!peer)
+ ret = set_up_i2c(dd, dd->asic_data);
+
return ret;
}
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
- /*
- * The real cpu mask is part of the affinity struct but has to be
- * initialized earlier than the rest of the affinity struct because it
- * is needed to calculate the number of user contexts in
- * set_up_context_variables(). However, hfi1_dev_affinity_init(),
- * which initializes the rest of the affinity struct members,
- * depends on set_up_context_variables() for the number of kernel
- * contexts, so it cannot be called before set_up_context_variables().
- */
- ret = init_real_cpu_mask(dd);
- if (ret)
- goto bail_cleanup;
-
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
/* set up KDETH QP prefix in both RX and TX CSRs */
init_kdeth_qp(dd);
- hfi1_dev_affinity_init(dd);
+ ret = hfi1_dev_affinity_init(dd);
+ if (ret)
+ goto bail_cleanup;
/* send contexts must be set up before receive contexts */
ret = init_send_contexts(dd);
/* set up LCB access - must be after set_up_interrupts() */
init_lcb_access(dd);
+ /*
+ * Serial number is created from the base guid:
+ * [27:24] = base guid [38:35]
+ * [23: 0] = base guid [23: 0]
+ */
snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
- dd->base_guid & 0xFFFFFF);
+ (dd->base_guid & 0xFFFFFF) |
+ ((dd->base_guid >> 11) & 0xF000000));
dd->oui1 = dd->base_guid >> 56 & 0xFF;
dd->oui2 = dd->base_guid >> 48 & 0xFF;
ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
if (ret)
goto bail_clear_intr;
- check_fabric_firmware_versions(dd);
thermal_init(dd);
/* SBus commands */
#define RESET_SBUS_RECEIVER 0x20
#define WRITE_SBUS_RECEIVER 0x21
+#define READ_SBUS_RECEIVER 0x22
void sbus_request(struct hfi1_devdata *dd,
u8 receiver_addr, u8 data_addr, u8 command, u32 data_in);
int sbus_request_slow(struct hfi1_devdata *dd,
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
struct hfi1_message_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
-int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
- struct hfi1_ctxt_info *kinfo);
-u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
- u32 mask);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
#define ASIC_STS_SBUS_RESULT (ASIC + 0x000000000010)
#define ASIC_STS_SBUS_RESULT_DONE_SMASK 0x1ull
#define ASIC_STS_SBUS_RESULT_RCV_DATA_VALID_SMASK 0x2ull
+#define ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT 2
+#define ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK 0x7ull
+#define ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT 32
+#define ASIC_STS_SBUS_RESULT_DATA_OUT_MASK 0xFFFFFFFFull
#define ASIC_STS_THERM (ASIC + 0x000000000058)
#define ASIC_STS_THERM_CRIT_TEMP_MASK 0x7FFull
#define ASIC_STS_THERM_CRIT_TEMP_SHIFT 18
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = (be16_to_cpu(rhdr->lrh[0]) >> 12) & 0xf;
- if (rhf_dc_info(packet->rhf))
- sc5 |= 0x10;
+ sc5 = hdr2sc(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
packet->rcv_flags = 0;
}
-static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
- struct hfi1_other_headers *ohdr,
- u64 rhf, u32 bth1, struct ib_grh *grh)
+void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- u32 rqpn = 0;
- u16 rlid;
- u8 sc5, svc_type;
+ struct hfi1_ib_header *hdr = pkt->hdr;
+ struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_grh *grh = NULL;
+ u32 rqpn = 0, bth1;
+ u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u8 sc, svc_type;
+ bool is_mcast = false;
+
+ if (pkt->rcv_flags & HFI1_HAS_GRH)
+ grh = &hdr->u.l.grh;
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
rlid = be16_to_cpu(hdr->lrh[3]);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
rlid = qp->remote_ah_attr.dlid;
return;
}
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- if (rhf_dc_info(rhf))
- sc5 |= 0x10;
+ sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
- if (bth1 & HFI1_FECN_SMASK) {
+ bth1 = be32_to_cpu(ohdr->bth[1]);
+ if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- u16 dlid = be16_to_cpu(hdr->lrh[1]);
- return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc5, grh);
+ return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (bth1 & HFI1_BECN_SMASK) {
+ if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
- u8 sl = ibp->sc_to_sl[sc5];
+ u8 sl = ibp->sc_to_sl[sc];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
+
}
struct ps_mdata {
struct rvt_qp *qp;
struct hfi1_ib_header *hdr;
struct hfi1_other_headers *ohdr;
- struct ib_grh *grh = NULL;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
hfi1_get_msgheader(dd, rhf_addr);
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_BTH) {
+ if (lnh == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
+ else if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
- grh = &hdr->u.l.grh;
- } else {
+ else
goto next; /* just in case */
- }
+
bth1 = be32_to_cpu(ohdr->bth[1]);
is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
goto next;
}
- process_ecn(qp, hdr, ohdr, rhf, bth1, grh);
+ process_ecn(qp, packet, true);
rcu_read_unlock();
/* turn off BECN, FECN */
dd_dev_err(packet->rcd->dd,
"Bypass packets are not supported in normal operation. Dropping\n");
+ incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors);
return RHF_RCV_CONTINUE;
}
static int hfi1_file_open(struct inode *inode, struct file *fp)
{
+ struct hfi1_filedata *fd;
struct hfi1_devdata *dd = container_of(inode->i_cdev,
struct hfi1_devdata,
user_cdev);
kobject_get(&dd->kobj);
/* The real work is performed later in assign_ctxt() */
- fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL);
- if (fp->private_data) /* no cpu affinity by default */
- ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1;
- return fp->private_data ? 0 : -ENOMEM;
+
+ fd = kzalloc(sizeof(*fd), GFP_KERNEL);
+
+ if (fd) {
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->mm = current->mm;
+ }
+
+ fp->private_data = fd;
+
+ return fd ? 0 : -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(struct hfi1_base_info));
break;
case HFI1_IOCTL_CREDIT_UPD:
- if (uctxt && uctxt->sc)
+ if (uctxt)
sc_return_credits(uctxt->sc);
break;
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
- int ret = 0, done = 0, reqs = 0;
+ int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
- if (!cq || !pq) {
- ret = -EIO;
- goto done;
- }
+ if (!cq || !pq)
+ return -EIO;
- if (!iter_is_iovec(from) || !dim) {
- ret = -EINVAL;
- goto done;
- }
+ if (!iter_is_iovec(from) || !dim)
+ return -EINVAL;
hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
- ret = -ENOSPC;
- goto done;
- }
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ return -ENOSPC;
while (dim) {
+ int ret;
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
kiocb->ki_filp, (struct iovec *)(from->iov + done),
dim, &count);
- if (ret)
- goto done;
+ if (ret) {
+ reqs = ret;
+ break;
+ }
dim -= count;
done += count;
reqs++;
}
-done:
- return ret ? ret : reqs;
+
+ return reqs;
}
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
hfi1_user_sdma_free_queues(fdata);
/* release the cpu */
- hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+ hfi1_put_proc_affinity(fdata->rec_cpu_num);
/*
* Clear any left over, unhandled events so the next process that
if (--uctxt->cnt) {
uctxt->active_slaves &= ~(1 << fdata->subctxt);
- uctxt->subpid[fdata->subctxt] = 0;
mutex_unlock(&hfi1_mutex);
goto done;
}
write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
sc_disable(uctxt->sc);
- uctxt->pid = 0;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL;
ret = find_shared_ctxt(fp, uinfo);
if (ret < 0)
goto done_unlock;
- if (ret)
- fd->rec_cpu_num = hfi1_get_proc_affinity(
- fd->uctxt->dd, fd->uctxt->numa_id);
+ if (ret) {
+ fd->rec_cpu_num =
+ hfi1_get_proc_affinity(fd->uctxt->numa_id);
+ }
}
/*
}
fd->uctxt = uctxt;
fd->subctxt = uctxt->cnt++;
- uctxt->subpid[fd->subctxt] = current->pid;
uctxt->active_slaves |= 1 << fd->subctxt;
ret = 1;
goto done;
if (ctxt == dd->num_rcv_contexts)
return -EBUSY;
- fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+ /*
+ * If we don't have a NUMA node requested, preference is towards
+ * device NUMA node.
+ */
+ fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node);
if (fd->rec_cpu_num != -1)
numa = cpu_to_node(fd->rec_cpu_num);
else
return ret;
}
uctxt->userversion = uinfo->userversion;
- uctxt->pid = current->pid;
- uctxt->flags = HFI1_CAP_UGET(MASK);
+ uctxt->flags = hfi1_cap_mask; /* save current flag state */
init_waitqueue_head(&uctxt->wait);
strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*
* Ignore the bit in the flags for now until proper
* support for multiple packet per rcv array entry is
* added.
*/
- if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR))
rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
/*
* The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
* uses of the chip or ctxt. Therefore, add the rcvctrl op
* for both cases.
*/
- if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
int ret = 0;
memset(&cinfo, 0, sizeof(cinfo));
- ret = hfi1_get_base_kinfo(uctxt, &cinfo);
- if (ret < 0)
- goto done;
+ cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) &
+ HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) |
+ HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
+ HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
+ /* adjust flag if this fd is not able to cache */
+ if (!fd->handler)
+ cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
+
cinfo.num_active = hfi1_count_active_units();
cinfo.unit = uctxt->dd->unit;
cinfo.ctxt = uctxt->ctxt;
trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
if (copy_to_user(ubase, &cinfo, sizeof(cinfo)))
ret = -EFAULT;
-done:
+
return ret;
}
/* the number of fabric SerDes on the SBus */
#define NUM_FABRIC_SERDES 4
+/* ASIC_STS_SBUS_RESULT.RESULT_CODE value */
+#define SBUS_READ_COMPLETE 0x4
+
/* SBus fabric SerDes addresses, one set per HFI */
static const u8 fabric_serdes_addrs[2][NUM_FABRIC_SERDES] = {
{ 0x01, 0x02, 0x03, 0x04 },
static void dispose_one_firmware(struct firmware_details *fdet);
static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
struct firmware_details *fdet);
+static void dump_fw_version(struct hfi1_devdata *dd);
/*
* Read a single 64-bit value from 8051 data memory.
ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
}
+/*
+ * Read a value from the SBus.
+ *
+ * Requires the caller to be in fast mode
+ */
+static u32 sbus_read(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr,
+ u32 data_in)
+{
+ u64 reg;
+ int retries;
+ int success = 0;
+ u32 result = 0;
+ u32 result_code = 0;
+
+ sbus_request(dd, receiver_addr, data_addr, READ_SBUS_RECEIVER, data_in);
+
+ for (retries = 0; retries < 100; retries++) {
+ usleep_range(1000, 1200); /* arbitrary */
+ reg = read_csr(dd, ASIC_STS_SBUS_RESULT);
+ result_code = (reg >> ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT)
+ & ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK;
+ if (result_code != SBUS_READ_COMPLETE)
+ continue;
+
+ success = 1;
+ result = (reg >> ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT)
+ & ASIC_STS_SBUS_RESULT_DATA_OUT_MASK;
+ break;
+ }
+
+ if (!success) {
+ dd_dev_err(dd, "%s: read failed, result code 0x%x\n", __func__,
+ result_code);
+ }
+
+ return result;
+}
+
/*
* Turn off the SBus and fabric serdes spicos.
*
return ret;
}
+ dump_fw_version(dd);
return 0;
}
dd_dev_info(dd, "GUID %llx",
(unsigned long long)dd->base_guid);
}
+
+/* read and display firmware version info */
+static void dump_fw_version(struct hfi1_devdata *dd)
+{
+ u32 pcie_vers[NUM_PCIE_SERDES];
+ u32 fabric_vers[NUM_FABRIC_SERDES];
+ u32 sbus_vers;
+ int i;
+ int all_same;
+ int ret;
+ u8 rcv_addr;
+
+ ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+ if (ret) {
+ dd_dev_err(dd, "Unable to acquire SBus to read firmware versions\n");
+ return;
+ }
+
+ /* set fast mode */
+ set_sbus_fast_mode(dd);
+
+ /* read version for SBus Master */
+ sbus_request(dd, SBUS_MASTER_BROADCAST, 0x02, WRITE_SBUS_RECEIVER, 0);
+ sbus_request(dd, SBUS_MASTER_BROADCAST, 0x07, WRITE_SBUS_RECEIVER, 0x1);
+ /* wait for interrupt to be processed */
+ usleep_range(10000, 11000);
+ sbus_vers = sbus_read(dd, SBUS_MASTER_BROADCAST, 0x08, 0x1);
+ dd_dev_info(dd, "SBus Master firmware version 0x%08x\n", sbus_vers);
+
+ /* read version for PCIe SerDes */
+ all_same = 1;
+ pcie_vers[0] = 0;
+ for (i = 0; i < NUM_PCIE_SERDES; i++) {
+ rcv_addr = pcie_serdes_addrs[dd->hfi1_id][i];
+ sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0);
+ /* wait for interrupt to be processed */
+ usleep_range(10000, 11000);
+ pcie_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0);
+ if (i > 0 && pcie_vers[0] != pcie_vers[i])
+ all_same = 0;
+ }
+
+ if (all_same) {
+ dd_dev_info(dd, "PCIe SerDes firmware version 0x%x\n",
+ pcie_vers[0]);
+ } else {
+ dd_dev_warn(dd, "PCIe SerDes do not have the same firmware version\n");
+ for (i = 0; i < NUM_PCIE_SERDES; i++) {
+ dd_dev_info(dd,
+ "PCIe SerDes lane %d firmware version 0x%x\n",
+ i, pcie_vers[i]);
+ }
+ }
+
+ /* read version for fabric SerDes */
+ all_same = 1;
+ fabric_vers[0] = 0;
+ for (i = 0; i < NUM_FABRIC_SERDES; i++) {
+ rcv_addr = fabric_serdes_addrs[dd->hfi1_id][i];
+ sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0);
+ /* wait for interrupt to be processed */
+ usleep_range(10000, 11000);
+ fabric_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0);
+ if (i > 0 && fabric_vers[0] != fabric_vers[i])
+ all_same = 0;
+ }
+
+ if (all_same) {
+ dd_dev_info(dd, "Fabric SerDes firmware version 0x%x\n",
+ fabric_vers[0]);
+ } else {
+ dd_dev_warn(dd, "Fabric SerDes do not have the same firmware version\n");
+ for (i = 0; i < NUM_FABRIC_SERDES; i++) {
+ dd_dev_info(dd,
+ "Fabric SerDes lane %d firmware version 0x%x\n",
+ i, fabric_vers[i]);
+ }
+ }
+
+ clear_sbus_fast_mode(dd);
+ release_chip_resource(dd, CR_SBUS);
+}
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
/* chip offset of PIO buffers for this ctxt */
u32 piobufs;
/* per-context configuration flags */
- u32 flags;
+ unsigned long flags;
/* per-context event flags for fileops/intr communication */
unsigned long event_flags;
/* WAIT_RCV that timed out, no interrupt */
u32 urgent;
/* saved total number of polled urgent packets for poll edge trigger */
u32 urgent_poll;
- /* pid of process using this ctxt */
- pid_t pid;
- pid_t subpid[HFI1_MAX_SHARED_CTXTS];
/* same size as task_struct .comm[], command that opened context */
char comm[TASK_COMM_LEN];
/* so file ops can get at unit */
u8 etype;
};
-static inline bool has_sc4_bit(struct hfi1_packet *p)
-{
- return !!rhf_dc_info(p->rhf);
-}
-
/*
* Private data for snoop/capture support.
*/
u8 triggers; /* temperature triggers */
};
+struct hfi1_i2c_bus {
+ struct hfi1_devdata *controlling_dd; /* current controlling device */
+ struct i2c_adapter adapter; /* bus details */
+ struct i2c_algo_bit_data algo; /* bus algorithm details */
+ int num; /* bus number, 0 or 1 */
+};
+
/* common data between shared ASIC HFIs */
struct hfi1_asic_data {
struct hfi1_devdata *dds[2]; /* back pointers */
struct mutex asic_resource_mutex;
+ struct hfi1_i2c_bus *i2c_bus0;
+ struct hfi1_i2c_bus *i2c_bus1;
};
/* device data struct now contains only "general per-device" info.
NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS];
/* Software counter that aggregates all cce_err_status errors */
u64 sw_cce_err_status_aggregate;
-
+ /* Software counter that aggregates all bypass packet rcv errors */
+ u64 sw_rcv_bypass_packet_errors;
/* receive interrupt functions */
rhf_rcv_function_ptr *rhf_rcv_function_map;
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
/* 8051 firmware version helper */
#define dc8051_ver(a, b) ((a) << 8 | (b))
+#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
+#define dc8051_ver_min(a) (a & 0x00ff)
/* f_put_tid types */
#define PT_EXPECTED 0
struct tid_rb_node;
struct mmu_rb_node;
+struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
- struct rb_root tid_rb_root;
+ struct mmu_rb_handler *handler;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
u32 invalid_tid_idx;
/* protect invalid_tids array and invalid_tid_idx */
spinlock_t invalid_lock;
+ struct mm_struct *mm;
};
extern struct list_head hfi1_dev_list;
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
void set_all_slowpath(struct hfi1_devdata *dd);
+extern const struct pci_device_id hfi1_pci_tbl[];
+
/* receive packet handler dispositions */
#define RCV_PKT_OK 0x0 /* keep going */
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf & RHF_DC_INFO_SMASK)) << 4);
+ ((!!(rhf_dc_info(rhf))) << 4);
}
static inline u16 generate_jkey(kuid_t uid)
return &dd->pport[pidx].ibport_data;
}
+void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool do_cnp);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool do_cnp)
+{
+ struct hfi1_other_headers *ohdr = pkt->ohdr;
+ u32 bth1;
+
+ bth1 = be32_to_cpu(ohdr->bth[1]);
+ if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
+ return bth1 & HFI1_FECN_SMASK;
+ }
+ return false;
+}
+
/*
* Return the indexed PKEY from the port PKEY table.
*/
}
/*
- * Readers of cc_state must call get_cc_state() under rcu_read_lock().
- * Writers of cc_state must call get_cc_state() under cc_state_lock.
+ * Called by readers of cc_state only, must call under rcu_read_lock().
*/
static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
{
return rcu_dereference(ppd->cc_state);
}
+/*
+ * Called by writers of cc_state only, must call under cc_state_lock.
+ */
+static inline
+struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
+{
+ return rcu_dereference_protected(ppd->cc_state,
+ lockdep_is_held(&ppd->cc_state_lock));
+}
+
/*
* values for dd->flags (_device_ related flags)
*/
*/
#define DEFAULT_RCVHDR_ENTSIZE 32
-bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32);
-int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **);
-void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool);
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
+ u32 nlocked, u32 npages);
+int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
+ size_t npages, bool writable, struct page **pages);
+void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
+ size_t npages, bool dirty);
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
+#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
+#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
+
+#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
+#define show_packettype(etype) \
+__print_symbolic(etype, \
+ packettype_name(EXPECTED), \
+ packettype_name(EAGER), \
+ packettype_name(IB), \
+ packettype_name(ERROR), \
+ packettype_name(BYPASS))
+
+#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
+#define show_ib_opcode(opcode) \
+__print_symbolic(opcode, \
+ ib_opcode_name(RC_SEND_FIRST), \
+ ib_opcode_name(RC_SEND_MIDDLE), \
+ ib_opcode_name(RC_SEND_LAST), \
+ ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_SEND_ONLY), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_READ_REQUEST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
+ ib_opcode_name(RC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_COMPARE_SWAP), \
+ ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(UC_SEND_FIRST), \
+ ib_opcode_name(UC_SEND_MIDDLE), \
+ ib_opcode_name(UC_SEND_LAST), \
+ ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_SEND_ONLY), \
+ ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UD_SEND_ONLY), \
+ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(CNP))
#endif /* _HFI1_KERNEL_H */
#include "debugfs.h"
#include "verbs.h"
#include "aspm.h"
+#include "affinity.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port)
{
- int i, size;
+ int i;
uint default_pkey_idx;
+ struct cc_state *cc_state;
ppd->dd = dd;
ppd->hw_pidx = hw_pidx;
spin_lock_init(&ppd->cc_state_lock);
spin_lock_init(&ppd->cc_log_lock);
- size = sizeof(struct cc_state);
- RCU_INIT_POINTER(ppd->cc_state, kzalloc(size, GFP_KERNEL));
- if (!rcu_dereference(ppd->cc_state))
+ cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL);
+ RCU_INIT_POINTER(ppd->cc_state, cc_state);
+ if (!cc_state)
goto bail;
return;
/*
* Release our hold on the shared asic data. If we are the last one,
- * free the structure. Must be holding hfi1_devs_lock.
+ * return the structure to be finalized outside the lock. Must be
+ * holding hfi1_devs_lock.
*/
-static void release_asic_data(struct hfi1_devdata *dd)
+static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
{
+ struct hfi1_asic_data *ad;
int other;
if (!dd->asic_data)
- return;
+ return NULL;
dd->asic_data->dds[dd->hfi1_id] = NULL;
other = dd->hfi1_id ? 0 : 1;
- if (!dd->asic_data->dds[other]) {
- /* we are the last holder, free it */
- kfree(dd->asic_data);
- }
+ ad = dd->asic_data;
dd->asic_data = NULL;
+ /* return NULL if the other dd still has a link */
+ return ad->dds[other] ? NULL : ad;
+}
+
+static void finalize_asic_data(struct hfi1_devdata *dd,
+ struct hfi1_asic_data *ad)
+{
+ clean_up_i2c(dd, ad);
+ kfree(ad);
}
static void __hfi1_free_devdata(struct kobject *kobj)
{
struct hfi1_devdata *dd =
container_of(kobj, struct hfi1_devdata, kobj);
+ struct hfi1_asic_data *ad;
unsigned long flags;
spin_lock_irqsave(&hfi1_devs_lock, flags);
idr_remove(&hfi1_unit_table, dd->unit);
list_del(&dd->list);
- release_asic_data(dd);
+ ad = release_asic_data(dd);
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ if (ad)
+ finalize_asic_data(dd, ad);
free_platform_config(dd);
rcu_barrier(); /* wait for rcu callbacks to complete */
free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit);
- hfi1_dev_affinity_free(dd);
free_percpu(dd->send_schedule);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
-static const struct pci_device_id hfi1_pci_tbl[] = {
+const struct pci_device_id hfi1_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) },
{ 0, }
if (ret)
goto bail;
+ ret = node_affinity_init();
+ if (ret)
+ goto bail;
+
/* validate max MTU before any devices start */
if (!valid_opa_max_mtu(hfi1_max_mtu)) {
pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n",
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
+ node_affinity_destroy();
hfi1_wss_exit();
hfi1_dbg_exit();
hfi1_cpulist_count = 0;
hrtimer_cancel(&ppd->cca_timer[i].hrtimer);
spin_lock(&ppd->cc_state_lock);
- cc_state = get_cc_state(ppd);
+ cc_state = get_cc_state_protected(ppd);
RCU_INIT_POINTER(ppd->cc_state, NULL);
spin_unlock(&ppd->cc_state_lock);
hfi1_cdbg(PROC,
"ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n",
- rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
- rcd->egrbufs.size);
+ rcd->ctxt, rcd->egrbufs.alloced,
+ rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
/*
* Set the contexts rcv array head update threshold to the closest
pi->port_phys_conf = (ppd->port_type & 0xf);
-#if PI_LED_ENABLE_SUP
pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
pi->port_states.ledenable_offlinereason |=
ppd->is_sm_config_started << 5;
pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
pi->port_states.ledenable_offlinereason |=
ppd->offline_disabled_reason;
-#else
- pi->port_states.offline_reason = ppd->neighbor_normal << 4;
- pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
- pi->port_states.offline_reason |= ppd->offline_disabled_reason;
-#endif /* PI_LED_ENABLE_SUP */
pi->port_states.portphysstate_portstate =
(hfi1_ibphys_portstate(ppd) << 4) | state;
if (start_of_sm_config && (lstate == IB_PORT_INIT))
ppd->is_sm_config_started = 1;
-#if PI_LED_ENABLE_SUP
psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
psi->port_states.ledenable_offlinereason |=
ppd->is_sm_config_started << 5;
psi->port_states.ledenable_offlinereason |=
ppd->offline_disabled_reason;
-#else
- psi->port_states.offline_reason = ppd->neighbor_normal << 4;
- psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
- psi->port_states.offline_reason |= ppd->offline_disabled_reason;
-#endif /* PI_LED_ENABLE_SUP */
psi->port_states.portphysstate_portstate =
(hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
rsp->port_rcv_remote_physical_errors =
cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
CNTR_INVALID_VL));
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- if (tmp2 < tmp) {
- /* overflow/wrapped */
- rsp->local_link_integrity_errors = cpu_to_be64(~0);
- } else {
- rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
- }
+ rsp->local_link_integrity_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
+ CNTR_INVALID_VL));
tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
CNTR_INVALID_VL);
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
idx_from_vl(vl)));
+ rsp->vls[vfi].port_vl_xmit_discards =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
+ idx_from_vl(vl)));
vlinfo++;
vfi++;
}
error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
CNTR_INVALID_VL);
/* local link integrity must be right-shifted by the lli resolution */
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- error_counter_summary += (tmp >> res_lli);
+ error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
+ CNTR_INVALID_VL) >> res_lli);
/* link error recovery must b right-shifted by the ler resolution */
tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
rsp->port_rcv_constraint_errors =
cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
CNTR_INVALID_VL));
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- if (tmp2 < tmp) {
- /* overflow/wrapped */
- rsp->local_link_integrity_errors = cpu_to_be64(~0);
- } else {
- rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
- }
+ rsp->local_link_integrity_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
+ CNTR_INVALID_VL));
rsp->excessive_buffer_overruns =
cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
}
tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
-
+ rsp->port_rcv_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
vlinfo = &rsp->vls[0];
vfi = 0;
vl_select_mask = be32_to_cpu(req->vl_select_mask);
for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
8 * sizeof(req->vl_select_mask)) {
memset(vlinfo, 0, sizeof(*vlinfo));
- /* vlinfo->vls[vfi].port_vl_xmit_discards ??? */
+ rsp->vls[vfi].port_vl_xmit_discards =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
+ idx_from_vl(vl)));
vlinfo += 1;
vfi++;
}
if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
- if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) {
- write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
+ if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
- }
if (counter_select & CS_LINK_ERROR_RECOVERY) {
write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
/* if (counter_select & CS_PORT_MARK_FECN)
* write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
*/
- /* port_vl_xmit_discards ??? */
+ if (counter_select & C_SW_XMIT_DSCD_VL)
+ write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
+ idx_from_vl(vl), 0);
}
if (resp_len)
*/
spin_lock(&ppd->cc_state_lock);
- old_cc_state = get_cc_state(ppd);
+ old_cc_state = get_cc_state_protected(ppd);
if (!old_cc_state) {
/* never active, or shutting down */
spin_unlock(&ppd->cc_state_lock);
write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
/* LocalLinkIntegrityErrors */
- write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
/* ExcessiveBufferOverruns */
write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
#define _HFI1_MAD_H
#include <rdma/ib_pma.h>
-#define USE_PI_LED_ENABLE 1 /*
- * use led enabled bit in struct
- * opa_port_states, if available
- */
#include <rdma/opa_smi.h>
#include <rdma/opa_port_info.h>
-#ifndef PI_LED_ENABLE_SUP
-#define PI_LED_ENABLE_SUP 0
-#endif
#include "opa_compat.h"
/*
#include "trace.h"
struct mmu_rb_handler {
- struct list_head list;
struct mmu_notifier mn;
- struct rb_root *root;
+ struct rb_root root;
+ void *ops_arg;
spinlock_t lock; /* protect the RB tree */
struct mmu_rb_ops *ops;
+ struct mm_struct *mm;
+ struct list_head lru_list;
+ struct work_struct del_work;
+ struct list_head del_list;
+ struct workqueue_struct *wq;
};
-static LIST_HEAD(mmu_rb_handlers);
-static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
-
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
-static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
unsigned long);
static inline void mmu_notifier_range_start(struct mmu_notifier *,
unsigned long, unsigned long);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
+static void do_remove(struct mmu_rb_handler *handler,
+ struct list_head *del_list);
+static void handle_remove(struct work_struct *work);
static struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
return PAGE_ALIGN(node->addr + node->len) - 1;
}
-int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+ struct mmu_rb_ops *ops,
+ struct workqueue_struct *wq,
+ struct mmu_rb_handler **handler)
{
struct mmu_rb_handler *handlr;
-
- if (!ops->invalidate)
- return -EINVAL;
+ int ret;
handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
if (!handlr)
return -ENOMEM;
- handlr->root = root;
+ handlr->root = RB_ROOT;
handlr->ops = ops;
+ handlr->ops_arg = ops_arg;
INIT_HLIST_NODE(&handlr->mn.hlist);
spin_lock_init(&handlr->lock);
handlr->mn.ops = &mn_opts;
- spin_lock(&mmu_rb_lock);
- list_add_tail_rcu(&handlr->list, &mmu_rb_handlers);
- spin_unlock(&mmu_rb_lock);
+ handlr->mm = mm;
+ INIT_WORK(&handlr->del_work, handle_remove);
+ INIT_LIST_HEAD(&handlr->del_list);
+ INIT_LIST_HEAD(&handlr->lru_list);
+ handlr->wq = wq;
+
+ ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+ if (ret) {
+ kfree(handlr);
+ return ret;
+ }
- return mmu_notifier_register(&handlr->mn, current->mm);
+ *handler = handlr;
+ return 0;
}
-void hfi1_mmu_rb_unregister(struct rb_root *root)
+void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *rbnode;
+ struct rb_node *node;
unsigned long flags;
-
- if (!handler)
- return;
+ struct list_head del_list;
/* Unregister first so we don't get any more notifications. */
- if (current->mm)
- mmu_notifier_unregister(&handler->mn, current->mm);
+ mmu_notifier_unregister(&handler->mn, handler->mm);
- spin_lock(&mmu_rb_lock);
- list_del_rcu(&handler->list);
- spin_unlock(&mmu_rb_lock);
- synchronize_rcu();
+ /*
+ * Make sure the wq delete handler is finished running. It will not
+ * be triggered once the mmu notifiers are unregistered above.
+ */
+ flush_work(&handler->del_work);
+
+ INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- if (!RB_EMPTY_ROOT(root)) {
- struct rb_node *node;
- struct mmu_rb_node *rbnode;
-
- while ((node = rb_first(root))) {
- rbnode = rb_entry(node, struct mmu_rb_node, node);
- rb_erase(node, root);
- if (handler->ops->remove)
- handler->ops->remove(root, rbnode, NULL);
- }
+ while ((node = rb_first(&handler->root))) {
+ rbnode = rb_entry(node, struct mmu_rb_node, node);
+ rb_erase(node, &handler->root);
+ /* move from LRU list to delete list */
+ list_move(&rbnode->list, &del_list);
}
spin_unlock_irqrestore(&handler->lock, flags);
+ do_remove(handler, &del_list);
+
kfree(handler);
}
-int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
struct mmu_rb_node *node;
unsigned long flags;
int ret = 0;
- if (!handler)
- return -EINVAL;
-
spin_lock_irqsave(&handler->lock, flags);
hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
mnode->len);
ret = -EINVAL;
goto unlock;
}
- __mmu_int_rb_insert(mnode, root);
+ __mmu_int_rb_insert(mnode, &handler->root);
+ list_add(&mnode->list, &handler->lru_list);
- if (handler->ops->insert) {
- ret = handler->ops->insert(root, mnode);
- if (ret)
- __mmu_int_rb_remove(mnode, root);
+ ret = handler->ops->insert(handler->ops_arg, mnode);
+ if (ret) {
+ __mmu_int_rb_remove(mnode, &handler->root);
+ list_del(&mnode->list); /* remove from LRU list */
}
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
if (!handler->ops->filter) {
- node = __mmu_int_rb_iter_first(handler->root, addr,
+ node = __mmu_int_rb_iter_first(&handler->root, addr,
(addr + len) - 1);
} else {
- for (node = __mmu_int_rb_iter_first(handler->root, addr,
+ for (node = __mmu_int_rb_iter_first(&handler->root, addr,
(addr + len) - 1);
node;
node = __mmu_int_rb_iter_next(node, addr,
return node;
}
-/* Caller must *not* hold handler lock. */
-static void __mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *node, struct mm_struct *mm)
-{
- unsigned long flags;
-
- /* Validity of handler and node pointers has been checked by caller. */
- hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
- node->len);
- spin_lock_irqsave(&handler->lock, flags);
- __mmu_int_rb_remove(node, handler->root);
- spin_unlock_irqrestore(&handler->lock, flags);
-
- if (handler->ops->remove)
- handler->ops->remove(handler->root, node, mm);
-}
-
-struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
- unsigned long len)
+struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
struct mmu_rb_node *node;
unsigned long flags;
- if (!handler)
- return ERR_PTR(-EINVAL);
-
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
+ if (node) {
+ __mmu_int_rb_remove(node, &handler->root);
+ list_del(&node->list); /* remove from LRU list */
+ }
spin_unlock_irqrestore(&handler->lock, flags);
return node;
}
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root,
- unsigned long addr, unsigned long len)
+void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
- struct mmu_rb_node *node;
+ struct mmu_rb_node *rbnode, *ptr;
+ struct list_head del_list;
unsigned long flags;
+ bool stop = false;
- if (!handler)
- return ERR_PTR(-EINVAL);
+ INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- node = __mmu_rb_search(handler, addr, len);
- if (node)
- __mmu_int_rb_remove(node, handler->root);
+ list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
+ list) {
+ if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
+ &stop)) {
+ __mmu_int_rb_remove(rbnode, &handler->root);
+ /* move from LRU list to delete list */
+ list_move(&rbnode->list, &del_list);
+ }
+ if (stop)
+ break;
+ }
spin_unlock_irqrestore(&handler->lock, flags);
- return node;
+ while (!list_empty(&del_list)) {
+ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&rbnode->list);
+ handler->ops->remove(handler->ops_arg, rbnode);
+ }
}
-void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+/*
+ * It is up to the caller to ensure that this function does not race with the
+ * mmu invalidate notifier which may be calling the users remove callback on
+ * 'node'.
+ */
+void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *node)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
-
- if (!handler || !node)
- return;
-
- __mmu_rb_remove(handler, node, NULL);
-}
+ unsigned long flags;
-static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
-{
- struct mmu_rb_handler *handler;
+ /* Validity of handler and node pointers has been checked by caller. */
+ hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
+ node->len);
+ spin_lock_irqsave(&handler->lock, flags);
+ __mmu_int_rb_remove(node, &handler->root);
+ list_del(&node->list); /* remove from LRU list */
+ spin_unlock_irqrestore(&handler->lock, flags);
- rcu_read_lock();
- list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) {
- if (handler->root == root)
- goto unlock;
- }
- handler = NULL;
-unlock:
- rcu_read_unlock();
- return handler;
+ handler->ops->remove(handler->ops_arg, node);
}
static inline void mmu_notifier_page(struct mmu_notifier *mn,
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
- struct rb_root *root = handler->root;
+ struct rb_root *root = &handler->root;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
+ bool added = false;
spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, start, end - 1);
ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len);
- if (handler->ops->invalidate(root, node)) {
+ if (handler->ops->invalidate(handler->ops_arg, node)) {
__mmu_int_rb_remove(node, root);
- if (handler->ops->remove)
- handler->ops->remove(root, node, mm);
+ /* move from LRU list to delete list */
+ list_move(&node->list, &handler->del_list);
+ added = true;
}
}
spin_unlock_irqrestore(&handler->lock, flags);
+
+ if (added)
+ queue_work(handler->wq, &handler->del_work);
+}
+
+/*
+ * Call the remove function for the given handler and the list. This
+ * is expected to be called with a delete list extracted from handler.
+ * The caller should not be holding the handler lock.
+ */
+static void do_remove(struct mmu_rb_handler *handler,
+ struct list_head *del_list)
+{
+ struct mmu_rb_node *node;
+
+ while (!list_empty(del_list)) {
+ node = list_first_entry(del_list, struct mmu_rb_node, list);
+ list_del(&node->list);
+ handler->ops->remove(handler->ops_arg, node);
+ }
+}
+
+/*
+ * Work queue function to remove all nodes that have been queued up to
+ * be removed. The key feature is that mm->mmap_sem is not being held
+ * and the remove callback can sleep while taking it, if needed.
+ */
+static void handle_remove(struct work_struct *work)
+{
+ struct mmu_rb_handler *handler = container_of(work,
+ struct mmu_rb_handler,
+ del_work);
+ struct list_head del_list;
+ unsigned long flags;
+
+ /* remove anything that is queued to get removed */
+ spin_lock_irqsave(&handler->lock, flags);
+ list_replace_init(&handler->del_list, &del_list);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ do_remove(handler, &del_list);
}
unsigned long len;
unsigned long __last;
struct rb_node node;
+ struct list_head list;
};
+/*
+ * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is
+ * allowed to sleep.
+ */
struct mmu_rb_ops {
- bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
- int (*insert)(struct rb_root *, struct mmu_rb_node *);
- void (*remove)(struct rb_root *, struct mmu_rb_node *,
- struct mm_struct *);
- int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+ bool (*filter)(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+ int (*insert)(void *ops_arg, struct mmu_rb_node *mnode);
+ void (*remove)(void *ops_arg, struct mmu_rb_node *mnode);
+ int (*invalidate)(void *ops_arg, struct mmu_rb_node *node);
+ int (*evict)(void *ops_arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop);
};
-int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
-void hfi1_mmu_rb_unregister(struct rb_root *);
-int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
-void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
-struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
- unsigned long);
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long,
- unsigned long);
+int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+ struct mmu_rb_ops *ops,
+ struct workqueue_struct *wq,
+ struct mmu_rb_handler **handler);
+void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
+int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode);
+void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
+void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode);
+struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len);
#endif /* _HFI1_MMU_RB_H */
module_param(pcie_pset, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10");
+static uint pcie_ctle = 1; /* discrete on, integrated off */
+module_param(pcie_ctle, uint, S_IRUGO);
+MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off");
+
/* equalization columns */
#define PREC 0
#define ATTN 1
{ 0x00, 0x1e, 0x0a }, /* p10 */
};
+static const u8 discrete_ctle_tunings[11][4] = {
+ /* DC LF HF BW */
+ { 0x48, 0x0b, 0x04, 0x04 }, /* p0 */
+ { 0x60, 0x05, 0x0f, 0x0a }, /* p1 */
+ { 0x50, 0x09, 0x06, 0x06 }, /* p2 */
+ { 0x68, 0x05, 0x0f, 0x0a }, /* p3 */
+ { 0x80, 0x05, 0x0f, 0x0a }, /* p4 */
+ { 0x70, 0x05, 0x0f, 0x0a }, /* p5 */
+ { 0x68, 0x05, 0x0f, 0x0a }, /* p6 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */
+ { 0x48, 0x09, 0x06, 0x06 }, /* p8 */
+ { 0x60, 0x05, 0x0f, 0x0a }, /* p9 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p10 */
+};
+
+static const u8 integrated_ctle_tunings[11][4] = {
+ /* DC LF HF BW */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p0 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p1 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p2 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p3 */
+ { 0x58, 0x0a, 0x05, 0x05 }, /* p4 */
+ { 0x48, 0x0a, 0x05, 0x05 }, /* p5 */
+ { 0x40, 0x0a, 0x05, 0x05 }, /* p6 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p8 */
+ { 0x38, 0x09, 0x06, 0x06 }, /* p9 */
+ { 0x38, 0x0e, 0x01, 0x01 }, /* p10 */
+};
+
/* helper to format the value to write to hardware */
#define eq_value(pre, curr, post) \
((((u32)(pre)) << \
u32 status, err;
int ret;
int do_retry, retry_count = 0;
+ int intnum = 0;
uint default_pset;
u16 target_vector, target_speed;
u16 lnkctl2, vendor;
u8 div;
const u8 (*eq)[3];
+ const u8 (*ctle_tunings)[4];
+ uint static_ctle_mode;
int return_error = 0;
/* PCIe Gen3 is for the ASIC only */
div = 3;
eq = discrete_preliminary_eq;
default_pset = DEFAULT_DISCRETE_PSET;
+ ctle_tunings = discrete_ctle_tunings;
+ /* bit 0 - discrete on/off */
+ static_ctle_mode = pcie_ctle & 0x1;
} else {
/* 400mV, FS=29, LF = 9 */
fs = 29;
div = 1;
eq = integrated_preliminary_eq;
default_pset = DEFAULT_MCP_PSET;
+ ctle_tunings = integrated_ctle_tunings;
+ /* bit 1 - integrated on/off */
+ static_ctle_mode = (pcie_ctle >> 1) & 0x1;
}
pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
(fs <<
* step 5c: Program gasket interrupts
*/
/* set the Rx Bit Rate to REFCLK ratio */
- write_gasket_interrupt(dd, 0, 0x0006, 0x0050);
+ write_gasket_interrupt(dd, intnum++, 0x0006, 0x0050);
/* disable pCal for PCIe Gen3 RX equalization */
- write_gasket_interrupt(dd, 1, 0x0026, 0x5b01);
+ /* select adaptive or static CTLE */
+ write_gasket_interrupt(dd, intnum++, 0x0026,
+ 0x5b01 | (static_ctle_mode << 3));
/*
* Enable iCal for PCIe Gen3 RX equalization, and set which
* evaluation of RX_EQ_EVAL will launch the iCal procedure.
*/
- write_gasket_interrupt(dd, 2, 0x0026, 0x5202);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x5202);
+
+ if (static_ctle_mode) {
+ /* apply static CTLE tunings */
+ u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw;
+
+ pcie_dc = ctle_tunings[pcie_pset][0];
+ pcie_lf = ctle_tunings[pcie_pset][1];
+ pcie_hf = ctle_tunings[pcie_pset][2];
+ pcie_bw = ctle_tunings[pcie_pset][3];
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x5500 | pcie_bw);
+ }
+
/* terminate list */
- write_gasket_interrupt(dd, 3, 0x0000, 0x0000);
+ write_gasket_interrupt(dd, intnum++, 0x0000, 0x0000);
/*
* step 5d: program XMT margin
dd->vld[15].sc = sc_alloc(dd, SC_VL15,
dd->rcd[0]->rcvhdrqentsize, dd->node);
if (!dd->vld[15].sc)
- goto nomem;
+ return -ENOMEM;
+
hfi1_init_ctxt(dd->vld[15].sc);
dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
- dd->kernel_send_context = kmalloc_node(dd->num_send_contexts *
+ dd->kernel_send_context = kzalloc_node(dd->num_send_contexts *
sizeof(struct send_context *),
GFP_KERNEL, dd->node);
+ if (!dd->kernel_send_context)
+ goto freesc15;
+
dd->kernel_send_context[0] = dd->vld[15].sc;
for (i = 0; i < num_vls; i++) {
if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
goto nomem;
return 0;
+
nomem:
- sc_free(dd->vld[15].sc);
- for (i = 0; i < num_vls; i++)
+ for (i = 0; i < num_vls; i++) {
sc_free(dd->vld[i].sc);
+ dd->vld[i].sc = NULL;
+ }
+
for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
sc_free(dd->kernel_send_context[i + 1]);
+
+ kfree(dd->kernel_send_context);
+ dd->kernel_send_context = NULL;
+
+freesc15:
+ sc_free(dd->vld[15].sc);
return -ENOMEM;
}
u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0;
u8 *cache = ppd->qsfp_info.cache;
- /* Enable external device config if channel is limiting active */
- read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
- GENERAL_CONFIG, &config_data);
- config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT);
- config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT);
- ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
- GENERAL_CONFIG, config_data);
- if (ret != HCMD_SUCCESS)
- dd_dev_err(
- ppd->dd,
- "%s: Failed to set enable external device config\n",
- __func__);
-
- config_data = 0; /* re-init */
/* Pass tuning method to 8051 */
read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
&config_data);
if (ret)
return ret;
+ /*
+ * We'll change the QSFP memory contents from here on out, thus we set a
+ * flag here to remind ourselves to reset the QSFP module. This prevents
+ * reuse of stale settings established in our previous pass through.
+ */
if (ppd->qsfp_info.reset_needed) {
reset_qsfp(ppd);
- ppd->qsfp_info.reset_needed = 0;
refresh_qsfp_cache(ppd, &ppd->qsfp_info);
} else {
ppd->qsfp_info.reset_needed = 1;
#include <linux/seq_file.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
+#include <rdma/ib_verbs.h>
#include "hfi.h"
#include "qp.h"
32768 /* 1E */
};
+const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
+[IB_WR_RDMA_WRITE] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_RDMA_READ] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC,
+},
+
+[IB_WR_ATOMIC_CMP_AND_SWP] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_ATOMIC_FETCH_AND_ADD] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_RDMA_WRITE_WITH_IMM] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND_WITH_IMM] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_REG_MR] = {
+ .length = sizeof(struct ib_reg_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_LOCAL,
+},
+
+[IB_WR_LOCAL_INV] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_LOCAL,
+},
+
+[IB_WR_SEND_WITH_INV] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+},
+
+};
+
static void flush_tx_list(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
priv->owner = qp;
- priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
- if (!priv->s_hdr) {
+ priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
+ rdi->dparms.node);
+ if (!priv->s_ahg) {
kfree(priv);
return ERR_PTR(-ENOMEM);
}
{
struct hfi1_qp_priv *priv = qp->priv;
- kfree(priv->s_hdr);
+ kfree(priv->s_ahg);
kfree(priv);
}
extern unsigned int hfi1_qp_table_size;
+extern const struct rvt_operation_params hfi1_post_parms[];
+
/*
* free_ahg - clear ahg from QP
*/
{
struct hfi1_qp_priv *priv = qp->priv;
- priv->s_hdr->ahgcount = 0;
+ priv->s_ahg->ahgcount = 0;
qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
if (priv->s_sde && qp->s_ahgidx >= 0)
sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
#include <linux/vmalloc.h>
#include "hfi.h"
-#include "twsi.h"
+
+/* for the given bus number, return the CSR for reading an i2c line */
+static inline u32 i2c_in_csr(u32 bus_num)
+{
+ return bus_num ? ASIC_QSFP2_IN : ASIC_QSFP1_IN;
+}
+
+/* for the given bus number, return the CSR for writing an i2c line */
+static inline u32 i2c_oe_csr(u32 bus_num)
+{
+ return bus_num ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
+}
+
+static void hfi1_setsda(void *data, int state)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ struct hfi1_devdata *dd = bus->controlling_dd;
+ u64 reg;
+ u32 target_oe;
+
+ target_oe = i2c_oe_csr(bus->num);
+ reg = read_csr(dd, target_oe);
+ /*
+ * The OE bit value is inverted and connected to the pin. When
+ * OE is 0 the pin is left to be pulled up, when the OE is 1
+ * the pin is driven low. This matches the "open drain" or "open
+ * collector" convention.
+ */
+ if (state)
+ reg &= ~QSFP_HFI0_I2CDAT;
+ else
+ reg |= QSFP_HFI0_I2CDAT;
+ write_csr(dd, target_oe, reg);
+ /* do a read to force the write into the chip */
+ (void)read_csr(dd, target_oe);
+}
+
+static void hfi1_setscl(void *data, int state)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ struct hfi1_devdata *dd = bus->controlling_dd;
+ u64 reg;
+ u32 target_oe;
+
+ target_oe = i2c_oe_csr(bus->num);
+ reg = read_csr(dd, target_oe);
+ /*
+ * The OE bit value is inverted and connected to the pin. When
+ * OE is 0 the pin is left to be pulled up, when the OE is 1
+ * the pin is driven low. This matches the "open drain" or "open
+ * collector" convention.
+ */
+ if (state)
+ reg &= ~QSFP_HFI0_I2CCLK;
+ else
+ reg |= QSFP_HFI0_I2CCLK;
+ write_csr(dd, target_oe, reg);
+ /* do a read to force the write into the chip */
+ (void)read_csr(dd, target_oe);
+}
+
+static int hfi1_getsda(void *data)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ u64 reg;
+ u32 target_in;
+
+ hfi1_setsda(data, 1); /* clear OE so we do not pull line down */
+ udelay(2); /* 1us pull up + 250ns hold */
+
+ target_in = i2c_in_csr(bus->num);
+ reg = read_csr(bus->controlling_dd, target_in);
+ return !!(reg & QSFP_HFI0_I2CDAT);
+}
+
+static int hfi1_getscl(void *data)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ u64 reg;
+ u32 target_in;
+
+ hfi1_setscl(data, 1); /* clear OE so we do not pull line down */
+ udelay(2); /* 1us pull up + 250ns hold */
+
+ target_in = i2c_in_csr(bus->num);
+ reg = read_csr(bus->controlling_dd, target_in);
+ return !!(reg & QSFP_HFI0_I2CCLK);
+}
/*
- * QSFP support for hfi driver, using "Two Wire Serial Interface" driver
- * in twsi.c
+ * Allocate and initialize the given i2c bus number.
+ * Returns NULL on failure.
*/
-#define I2C_MAX_RETRY 4
+static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd,
+ struct hfi1_asic_data *ad, int num)
+{
+ struct hfi1_i2c_bus *bus;
+ int ret;
+
+ bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+ if (!bus)
+ return NULL;
+
+ bus->controlling_dd = dd;
+ bus->num = num; /* our bus number */
+
+ bus->algo.setsda = hfi1_setsda;
+ bus->algo.setscl = hfi1_setscl;
+ bus->algo.getsda = hfi1_getsda;
+ bus->algo.getscl = hfi1_getscl;
+ bus->algo.udelay = 5;
+ bus->algo.timeout = usecs_to_jiffies(50);
+ bus->algo.data = bus;
+
+ bus->adapter.owner = THIS_MODULE;
+ bus->adapter.algo_data = &bus->algo;
+ bus->adapter.dev.parent = &dd->pcidev->dev;
+ snprintf(bus->adapter.name, sizeof(bus->adapter.name),
+ "hfi1_i2c%d", num);
+
+ ret = i2c_bit_add_bus(&bus->adapter);
+ if (ret) {
+ dd_dev_info(dd, "%s: unable to add i2c bus %d, err %d\n",
+ __func__, num, ret);
+ kfree(bus);
+ return NULL;
+ }
+
+ return bus;
+}
/*
- * Raw i2c write. No set-up or lock checking.
+ * Initialize i2c buses.
+ * Return 0 on success, -errno on error.
*/
-static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
- int offset, void *bp, int len)
+int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
{
- struct hfi1_devdata *dd = ppd->dd;
- int ret, cnt;
- u8 *buff = bp;
+ ad->i2c_bus0 = init_i2c_bus(dd, ad, 0);
+ ad->i2c_bus1 = init_i2c_bus(dd, ad, 1);
+ if (!ad->i2c_bus0 || !ad->i2c_bus1)
+ return -ENOMEM;
+ return 0;
+};
- cnt = 0;
- while (cnt < len) {
- int wlen = len - cnt;
+static void clean_i2c_bus(struct hfi1_i2c_bus *bus)
+{
+ if (bus) {
+ i2c_del_adapter(&bus->adapter);
+ kfree(bus);
+ }
+}
- ret = hfi1_twsi_blk_wr(dd, target, i2c_addr, offset,
- buff + cnt, wlen);
- if (ret) {
- /* hfi1_twsi_blk_wr() 1 for error, else 0 */
- return -EIO;
- }
- offset += wlen;
- cnt += wlen;
+void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
+{
+ clean_i2c_bus(ad->i2c_bus0);
+ ad->i2c_bus0 = NULL;
+ clean_i2c_bus(ad->i2c_bus1);
+ ad->i2c_bus1 = NULL;
+}
+
+static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
+ u8 slave_addr, int offset, int offset_size,
+ u8 *data, u16 len)
+{
+ int ret;
+ int num_msgs;
+ u8 offset_bytes[2];
+ struct i2c_msg msgs[2];
+
+ switch (offset_size) {
+ case 0:
+ num_msgs = 1;
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = len;
+ msgs[0].buf = data;
+ break;
+ case 2:
+ offset_bytes[1] = (offset >> 8) & 0xff;
+ /* fall through */
+ case 1:
+ num_msgs = 2;
+ offset_bytes[0] = offset & 0xff;
+
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = offset_size;
+ msgs[0].buf = offset_bytes;
+
+ msgs[1].addr = slave_addr;
+ msgs[1].flags = I2C_M_NOSTART,
+ msgs[1].len = len;
+ msgs[1].buf = data;
+ break;
+ default:
+ return -EINVAL;
}
- /* Must wait min 20us between qsfp i2c transactions */
- udelay(20);
+ i2c->controlling_dd = dd;
+ ret = i2c_transfer(&i2c->adapter, msgs, num_msgs);
+ if (ret != num_msgs) {
+ dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; write failed, ret %d\n",
+ __func__, i2c->num, slave_addr, offset, len, ret);
+ return ret < 0 ? ret : -EIO;
+ }
+ return 0;
+}
+
+static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
+ u8 slave_addr, int offset, int offset_size,
+ u8 *data, u16 len)
+{
+ int ret;
+ int num_msgs;
+ u8 offset_bytes[2];
+ struct i2c_msg msgs[2];
+
+ switch (offset_size) {
+ case 0:
+ num_msgs = 1;
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = I2C_M_RD;
+ msgs[0].len = len;
+ msgs[0].buf = data;
+ break;
+ case 2:
+ offset_bytes[1] = (offset >> 8) & 0xff;
+ /* fall through */
+ case 1:
+ num_msgs = 2;
+ offset_bytes[0] = offset & 0xff;
+
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = offset_size;
+ msgs[0].buf = offset_bytes;
+
+ msgs[1].addr = slave_addr;
+ msgs[1].flags = I2C_M_RD,
+ msgs[1].len = len;
+ msgs[1].buf = data;
+ break;
+ default:
+ return -EINVAL;
+ }
- return cnt;
+ bus->controlling_dd = dd;
+ ret = i2c_transfer(&bus->adapter, msgs, num_msgs);
+ if (ret != num_msgs) {
+ dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; read failed, ret %d\n",
+ __func__, bus->num, slave_addr, offset, len, ret);
+ return ret < 0 ? ret : -EIO;
+ }
+ return 0;
+}
+
+/*
+ * Raw i2c write. No set-up or lock checking.
+ *
+ * Return 0 on success, -errno on error.
+ */
+static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
+ int offset, void *bp, int len)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ struct hfi1_i2c_bus *bus;
+ u8 slave_addr;
+ int offset_size;
+
+ bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0;
+ slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */
+ offset_size = (i2c_addr >> 8) & 0x3;
+ return i2c_bus_write(dd, bus, slave_addr, offset, offset_size, bp, len);
}
/*
* Caller must hold the i2c chain resource.
+ *
+ * Return number of bytes written, or -errno.
*/
int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "I2C chain %d write interface reset failed\n",
- target);
+ ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
+ if (ret)
return ret;
- }
- return __i2c_write(ppd, target, i2c_addr, offset, bp, len);
+ return len;
}
/*
* Raw i2c read. No set-up or lock checking.
+ *
+ * Return 0 on success, -errno on error.
*/
static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
- int ret, cnt, pass = 0;
- int orig_offset = offset;
-
- cnt = 0;
- while (cnt < len) {
- int rlen = len - cnt;
-
- ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
- bp + cnt, rlen);
- /* Some QSFP's fail first try. Retry as experiment */
- if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
- continue;
- if (ret) {
- /* hfi1_twsi_blk_rd() 1 for error, else 0 */
- ret = -EIO;
- goto exit;
- }
- offset += rlen;
- cnt += rlen;
- }
-
- ret = cnt;
-
-exit:
- if (ret < 0) {
- hfi1_dev_porterr(dd, ppd->port,
- "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n",
- target, i2c_addr, orig_offset, len);
- }
-
- /* Must wait min 20us between qsfp i2c transactions */
- udelay(20);
-
- return ret;
+ struct hfi1_i2c_bus *bus;
+ u8 slave_addr;
+ int offset_size;
+
+ bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0;
+ slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */
+ offset_size = (i2c_addr >> 8) & 0x3;
+ return i2c_bus_read(dd, bus, slave_addr, offset, offset_size, bp, len);
}
/*
* Caller must hold the i2c chain resource.
+ *
+ * Return number of bytes read, or -errno.
*/
int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "I2C chain %d read interface reset failed\n",
- target);
+ ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
+ if (ret)
return ret;
- }
- return __i2c_read(ppd, target, i2c_addr, offset, bp, len);
+ return len;
}
/*
* by writing @addr = ((256 * n) + m)
*
* Caller must hold the i2c chain resource.
+ *
+ * Return number of bytes written or -errno.
*/
int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
int count = 0;
int offset;
int nwrite;
- int ret;
+ int ret = 0;
u8 page;
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "QSFP chain %d write interface reset failed\n",
- target);
- return ret;
- }
-
while (count < len) {
/*
* Set the qsfp page based on a zero-based address
ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
- if (ret != 1) {
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ if (ret) {
hfi1_dev_porterr(ppd->dd, ppd->port,
"QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
target, ret);
- ret = -EIO;
break;
}
ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
offset, bp + count, nwrite);
- if (ret <= 0) /* stop on error or nothing written */
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ if (ret) /* stop on error */
break;
- count += ret;
- addr += ret;
+ count += nwrite;
+ addr += nwrite;
}
if (ret < 0)
/*
* Perform a stand-alone single QSFP write. Acquire the resource, do the
- * read, then release the resource.
+ * write, then release the resource.
*/
int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
* by reading @addr = ((256 * n) + m)
*
* Caller must hold the i2c chain resource.
+ *
+ * Return the number of bytes read or -errno.
*/
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
int count = 0;
int offset;
int nread;
- int ret;
+ int ret = 0;
u8 page;
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "QSFP chain %d read interface reset failed\n",
- target);
- return ret;
- }
-
while (count < len) {
/*
* Set the qsfp page based on a zero-based address
page = (u8)(addr / QSFP_PAGESIZE);
ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
- if (ret != 1) {
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ if (ret) {
hfi1_dev_porterr(ppd->dd, ppd->port,
"QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
target, ret);
- ret = -EIO;
break;
}
if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY)
nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
- /* QSFPs require a 5-10msec delay after write operations */
- mdelay(5);
ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
offset, bp + count, nread);
- if (ret <= 0) /* stop on error or nothing read */
+ if (ret) /* stop on error */
break;
- count += ret;
- addr += ret;
+ count += nread;
+ addr += nread;
}
if (ret < 0)
int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
+struct hfi1_asic_data;
+int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad);
+void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad);
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
+ /*
+ * Local operations are processed immediately
+ * after all prior requests have completed
+ */
+ if (wqe->wr.opcode == IB_WR_REG_MR ||
+ wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ int local_ops = 0;
+ int err = 0;
+
+ if (qp->s_last != qp->s_cur)
+ goto bail;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ if (++qp->s_tail == qp->s_size)
+ qp->s_tail = 0;
+ if (!(wqe->wr.send_flags &
+ RVT_SEND_COMPLETION_ONLY)) {
+ err = rvt_invalidate_rkey(
+ qp,
+ wqe->wr.ex.invalidate_rkey);
+ local_ops = 1;
+ }
+ hfi1_send_complete(qp, wqe,
+ err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
+ if (local_ops)
+ atomic_dec(&qp->local_ops_pending);
+ qp->s_hdrwords = 0;
+ goto done_free_tx;
+ }
+
newreq = 1;
qp->s_psn = wqe->psn;
}
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
}
if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_ONLY);
- } else {
+ } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
+ } else {
+ qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
+ /* Invalidate rkey comes after the BTH */
+ ohdr->u.ieth = cpu_to_be32(
+ wqe->wr.ex.invalidate_rkey);
+ hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_LAST);
- } else {
+ } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
+ } else {
+ qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
+ /* invalidate data comes after the BTH */
+ ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
+ hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
ibp->rvp.n_rc_timeouts++;
qp->s_flags &= ~RVT_S_TIMER;
del_timer(&qp->s_timer);
- trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1);
+ trace_hfi1_timeout(qp, qp->s_last_psn + 1);
restart_rc(qp, qp->s_last_psn + 1, 1);
hfi1_schedule_send(qp);
}
* If we were waiting for sends to complete before re-sending,
* and they are now complete, restart sending.
*/
- trace_hfi1_rc_sendcomplete(qp, psn);
+ trace_hfi1_sendcomplete(qp, psn);
if (qp->s_flags & RVT_S_WAIT_PSN &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
qp->s_flags &= ~RVT_S_WAIT_PSN;
spin_lock_irqsave(&qp->s_lock, flags);
- trace_hfi1_rc_ack(qp, psn);
+ trace_hfi1_ack(qp, psn);
/* Ignore invalid responses. */
smp_read_barrier_depends(); /* see post_one_send */
u8 i, prev;
int old_req;
- trace_hfi1_rc_rcv_error(qp, psn);
+ trace_hfi1_rcv_error(qp, psn);
if (diff > 0) {
/*
* Packet sequence error.
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
int diff;
struct ib_reth *reth;
unsigned long flags;
- u32 bth1;
int ret, is_fecn = 0;
int copy_last = 0;
+ u32 rkey;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
return;
- bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
- if (bth1 & HFI1_BECN_SMASK) {
- u16 rlid = qp->remote_ah_attr.dlid;
- u32 lqpn, rqpn;
-
- lqpn = qp->ibqp.qp_num;
- rqpn = qp->remote_qpn;
- process_becn(
- ppd,
- qp->remote_ah_attr.sl,
- rlid, lqpn, rqpn,
- IB_CC_SVCTYPE_RC);
- }
- is_fecn = bth1 & HFI1_FECN_SMASK;
- }
+ is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
opcode = (bth0 >> 24) & 0xff;
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
- opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(SEND_LAST_WITH_INVALIDATE))
break;
goto nack_inv;
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
+ case OP(SEND_ONLY_WITH_INVALIDATE):
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto nack_op_err;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto no_immediate_data;
+ if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
+ goto send_last_inv;
/* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
+ case OP(SEND_LAST_WITH_INVALIDATE):
+send_last_inv:
+ rkey = be32_to_cpu(ohdr->u.ieth);
+ if (rvt_invalidate_rkey(qp, rkey))
+ goto no_immediate_data;
+ wc.ex.invalidate_rkey = rkey;
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
/* fall through */
int ret;
int copy_last = 0;
u32 to;
+ int local_ops = 0;
rcu_read_lock();
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
+ case IB_WR_REG_MR:
+ goto send_comp;
+
+ case IB_WR_LOCAL_INV:
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ if (rvt_invalidate_rkey(sqp,
+ wqe->wr.ex.invalidate_rkey))
+ send_status = IB_WC_LOC_PROT_ERR;
+ local_ops = 1;
+ }
+ goto send_comp;
+
+ case IB_WR_SEND_WITH_INV:
+ if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
+ }
+ goto send;
+
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
+send:
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto op_err;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
hfi1_send_complete(sqp, wqe, send_status);
+ if (local_ops) {
+ atomic_dec(&sqp->local_ops_pending);
+ local_ops = 0;
+ }
goto again;
rnr_nak:
return sizeof(struct ib_grh) / sizeof(u32);
}
-#define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4)
+#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4)
/**
- * build_ahg - create ahg in s_hdr
+ * build_ahg - create ahg in s_ahg
* @qp: a pointer to QP
* @npsn: the next PSN for the request/response
*
qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
if (qp->s_ahgidx >= 0) {
qp->s_ahgpsn = npsn;
- priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
+ priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
/* save to protect a change in another thread */
- priv->s_hdr->sde = priv->s_sde;
- priv->s_hdr->ahgidx = qp->s_ahgidx;
+ priv->s_ahg->ahgidx = qp->s_ahgidx;
qp->s_flags |= RVT_S_AHG_VALID;
}
} else {
/* subsequent middle after valid */
if (qp->s_ahgidx >= 0) {
- priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
- priv->s_hdr->ahgidx = qp->s_ahgidx;
- priv->s_hdr->ahgcount++;
- priv->s_hdr->ahgdesc[0] =
+ priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG;
+ priv->s_ahg->ahgidx = qp->s_ahgidx;
+ priv->s_ahg->ahgcount++;
+ priv->s_ahg->ahgdesc[0] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16((u16)npsn),
BTH2_OFFSET,
16);
if ((npsn & 0xffff0000) !=
(qp->s_ahgpsn & 0xffff0000)) {
- priv->s_hdr->ahgcount++;
- priv->s_hdr->ahgdesc[1] =
+ priv->s_ahg->ahgcount++;
+ priv->s_ahg->ahgdesc[1] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16(
(u16)(npsn >> 16)),
}
lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
/*
- * reset s_hdr/AHG fields
+ * reset s_ahg/AHG fields
*
* This insures that the ahgentry/ahgcount
* are at a non-AHG default to protect
* build_ahg() will modify as appropriate
* to use the AHG feature.
*/
- priv->s_hdr->tx_flags = 0;
- priv->s_hdr->ahgcount = 0;
- priv->s_hdr->ahgidx = 0;
- priv->s_hdr->sde = NULL;
+ priv->s_ahg->tx_flags = 0;
+ priv->s_ahg->ahgcount = 0;
+ priv->s_ahg->ahgidx = 0;
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
else
*/
if (hfi1_verbs_send(qp, &ps))
return;
- /* Record that s_hdr is empty. */
+ /* Record that s_ahg is empty. */
qp->s_hdrwords = 0;
/* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
#include "hfi.h"
#include "mad.h"
#include "trace.h"
+#include "affinity.h"
/*
* Start of per-port congestion control structures and support code
return ret;
}
+static ssize_t show_sdma_affinity(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct hfi1_ibdev *dev =
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ struct hfi1_devdata *dd = dd_from_dev(dev);
+
+ return hfi1_get_sdma_affinity(dd, buf);
+}
+
+static ssize_t store_sdma_affinity(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hfi1_ibdev *dev =
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ struct hfi1_devdata *dd = dd_from_dev(dev);
+
+ return hfi1_set_sdma_affinity(dd, buf, count);
+}
+
/*
* end of per-unit (or driver, in some cases, but replicated
* per unit) functions
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
+static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
+ store_sdma_affinity);
static struct device_attribute *hfi1_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_boardversion,
&dev_attr_tempsense,
&dev_attr_chip_reset,
+ &dev_attr_sdma_affinity,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
-#undef TRACE_SYSTEM_VAR
-#define TRACE_SYSTEM_VAR hfi1
-
-#if !defined(__HFI1_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __HFI1_TRACE_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_seq.h>
-
-#include "hfi.h"
-#include "mad.h"
-#include "sdma.h"
-
-#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
-#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
-
-#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
-#define show_packettype(etype) \
-__print_symbolic(etype, \
- packettype_name(EXPECTED), \
- packettype_name(EAGER), \
- packettype_name(IB), \
- packettype_name(ERROR), \
- packettype_name(BYPASS))
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_rx
-
-TRACE_EVENT(hfi1_rcvhdr,
- TP_PROTO(struct hfi1_devdata *dd,
- u32 ctxt,
- u64 eflags,
- u32 etype,
- u32 hlen,
- u32 tlen,
- u32 updegr,
- u32 etail
- ),
- TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(u64, eflags)
- __field(u32, ctxt)
- __field(u32, etype)
- __field(u32, hlen)
- __field(u32, tlen)
- __field(u32, updegr)
- __field(u32, etail)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->eflags = eflags;
- __entry->ctxt = ctxt;
- __entry->etype = etype;
- __entry->hlen = hlen;
- __entry->tlen = tlen;
- __entry->updegr = updegr;
- __entry->etail = etail;
- ),
- TP_printk(
- "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
- __get_str(dev),
- __entry->ctxt,
- __entry->eflags,
- __entry->etype, show_packettype(__entry->etype),
- __entry->hlen,
- __entry->tlen,
- __entry->updegr,
- __entry->etail
- )
-);
-
-TRACE_EVENT(hfi1_receive_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
- TP_ARGS(dd, ctxt),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(u32, ctxt)
- __field(u8, slow_path)
- __field(u8, dma_rtail)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
- ),
- TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
- __get_str(dev),
- __entry->ctxt,
- __entry->slow_path,
- __entry->dma_rtail
- )
-);
-
-TRACE_EVENT(hfi1_exp_tid_reg,
- TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr,
- u32 npages, unsigned long va, unsigned long pa,
- dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_exp_tid_unreg,
- TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages,
- unsigned long va, unsigned long pa, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_exp_tid_inval,
- TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr,
- u32 npages, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __field(unsigned long, va)
- __field(u32, rarr)
- __field(u32, npages)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->va = va;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_mmu_invalidate,
- TP_PROTO(unsigned ctxt, u16 subctxt, const char *type,
- unsigned long start, unsigned long end),
- TP_ARGS(ctxt, subctxt, type, start, end),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __string(type, type)
- __field(unsigned long, start)
- __field(unsigned long, end)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __assign_str(type, type);
- __entry->start = start;
- __entry->end = end;
- ),
- TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
- __entry->ctxt,
- __entry->subctxt,
- __get_str(type),
- __entry->start,
- __entry->end
- )
- );
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_tx
-
-TRACE_EVENT(hfi1_piofree,
- TP_PROTO(struct send_context *sc, int extra),
- TP_ARGS(sc, extra),
- TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
- __field(u32, sw_index)
- __field(u32, hw_context)
- __field(int, extra)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
- __entry->sw_index = sc->sw_index;
- __entry->hw_context = sc->hw_context;
- __entry->extra = extra;
- ),
- TP_printk("[%s] ctxt %u(%u) extra %d",
- __get_str(dev),
- __entry->sw_index,
- __entry->hw_context,
- __entry->extra
- )
-);
-
-TRACE_EVENT(hfi1_wantpiointr,
- TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
- TP_ARGS(sc, needint, credit_ctrl),
- TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
- __field(u32, sw_index)
- __field(u32, hw_context)
- __field(u32, needint)
- __field(u64, credit_ctrl)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
- __entry->sw_index = sc->sw_index;
- __entry->hw_context = sc->hw_context;
- __entry->needint = needint;
- __entry->credit_ctrl = credit_ctrl;
- ),
- TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
- __get_str(dev),
- __entry->sw_index,
- __entry->hw_context,
- __entry->needint,
- (unsigned long long)__entry->credit_ctrl
- )
-);
-
-DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
- TP_PROTO(struct rvt_qp *qp, u32 flags),
- TP_ARGS(qp, flags),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, flags)
- __field(u32, s_flags)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->flags = flags;
- __entry->qpn = qp->ibqp.qp_num;
- __entry->s_flags = qp->s_flags;
- ),
- TP_printk(
- "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
- __get_str(dev),
- __entry->qpn,
- __entry->flags,
- __entry->s_flags
- )
-);
-
-DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
- TP_PROTO(struct rvt_qp *qp, u32 flags),
- TP_ARGS(qp, flags));
-
-DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
- TP_PROTO(struct rvt_qp *qp, u32 flags),
- TP_ARGS(qp, flags));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_ibhdrs
-
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
-const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
-
-#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
-
-const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
-
-#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
-
-#define lrh_name(lrh) { HFI1_##lrh, #lrh }
-#define show_lnh(lrh) \
-__print_symbolic(lrh, \
- lrh_name(LRH_BTH), \
- lrh_name(LRH_GRH))
-
-#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
-#define show_ib_opcode(opcode) \
-__print_symbolic(opcode, \
- ib_opcode_name(RC_SEND_FIRST), \
- ib_opcode_name(RC_SEND_MIDDLE), \
- ib_opcode_name(RC_SEND_LAST), \
- ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_SEND_ONLY), \
- ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_FIRST), \
- ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(RC_RDMA_WRITE_LAST), \
- ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_READ_REQUEST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
- ib_opcode_name(RC_ACKNOWLEDGE), \
- ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
- ib_opcode_name(RC_COMPARE_SWAP), \
- ib_opcode_name(RC_FETCH_ADD), \
- ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
- ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
- ib_opcode_name(UC_SEND_FIRST), \
- ib_opcode_name(UC_SEND_MIDDLE), \
- ib_opcode_name(UC_SEND_LAST), \
- ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_SEND_ONLY), \
- ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_FIRST), \
- ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(UC_RDMA_WRITE_LAST), \
- ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UD_SEND_ONLY), \
- ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(CNP))
-
-#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
-#define BTH_PRN \
- "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
- "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
-#define EHDR_PRN "%s"
-
-DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
- TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- /* LRH */
- __field(u8, vl)
- __field(u8, lver)
- __field(u8, sl)
- __field(u8, lnh)
- __field(u16, dlid)
- __field(u16, len)
- __field(u16, slid)
- /* BTH */
- __field(u8, opcode)
- __field(u8, se)
- __field(u8, m)
- __field(u8, pad)
- __field(u8, tver)
- __field(u16, pkey)
- __field(u8, f)
- __field(u8, b)
- __field(u32, qpn)
- __field(u8, a)
- __field(u32, psn)
- /* extended headers */
- __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
- ),
- TP_fast_assign(
- struct hfi1_other_headers *ohdr;
-
- DD_DEV_ASSIGN(dd);
- /* LRH */
- __entry->vl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
- __entry->lver =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
- __entry->sl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->lnh =
- (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- __entry->dlid =
- be16_to_cpu(hdr->lrh[1]);
- /* allow for larger len */
- __entry->len =
- be16_to_cpu(hdr->lrh[2]);
- __entry->slid =
- be16_to_cpu(hdr->lrh[3]);
- /* BTH */
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- __entry->opcode =
- (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->se =
- (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
- __entry->m =
- (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
- __entry->pad =
- (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- __entry->tver =
- (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
- __entry->pkey =
- be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
- __entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
- __entry->qpn =
- be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->a =
- (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
- /* allow for larger PSN */
- __entry->psn =
- be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
- /* extended headers */
- memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
- ibhdr_exhdr_len(hdr));
- ),
- TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
- __get_str(dev),
- /* LRH */
- __entry->vl,
- __entry->lver,
- __entry->sl,
- __entry->lnh, show_lnh(__entry->lnh),
- __entry->dlid,
- __entry->len,
- __entry->slid,
- /* BTH */
- __entry->opcode, show_ib_opcode(__entry->opcode),
- __entry->se,
- __entry->m,
- __entry->pad,
- __entry->tver,
- __entry->pkey,
- __entry->f,
- __entry->b,
- __entry->qpn,
- __entry->a,
- __entry->psn,
- /* extended headers */
- __parse_ib_ehdrs(
- __entry->opcode,
- (void *)__get_dynamic_array(ehdrs))
- )
-);
-
-DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-#define SNOOP_PRN \
- "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
- "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_snoop
-
-TRACE_EVENT(snoop_capture,
- TP_PROTO(struct hfi1_devdata *dd,
- int hdr_len,
- struct hfi1_ib_header *hdr,
- int data_len,
- void *data),
- TP_ARGS(dd, hdr_len, hdr, data_len, data),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, slid)
- __field(u16, dlid)
- __field(u32, qpn)
- __field(u8, opcode)
- __field(u8, sl)
- __field(u16, pkey)
- __field(u32, hdr_len)
- __field(u32, data_len)
- __field(u8, lnh)
- __dynamic_array(u8, raw_hdr, hdr_len)
- __dynamic_array(u8, raw_pkt, data_len)
- ),
- TP_fast_assign(
- struct hfi1_other_headers *ohdr;
-
- __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- DD_DEV_ASSIGN(dd);
- __entry->slid = be16_to_cpu(hdr->lrh[3]);
- __entry->dlid = be16_to_cpu(hdr->lrh[1]);
- __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->hdr_len = hdr_len;
- __entry->data_len = data_len;
- memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
- memcpy(__get_dynamic_array(raw_pkt), data, data_len);
- ),
- TP_printk(
- "[%s] " SNOOP_PRN,
- __get_str(dev),
- __entry->slid,
- __entry->dlid,
- __entry->qpn,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->sl,
- __entry->pkey,
- __entry->hdr_len,
- __entry->data_len
- )
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_ctxts
-
-#define UCTXT_FMT \
- "cred:%u, credaddr:0x%llx, piobase:0x%llx, rcvhdr_cnt:%u, " \
- "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
-TRACE_EVENT(hfi1_uctxtdata,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
- TP_ARGS(dd, uctxt),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(unsigned, ctxt)
- __field(u32, credits)
- __field(u64, hw_free)
- __field(u64, piobase)
- __field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
- __field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = uctxt->ctxt;
- __entry->credits = uctxt->sc->credits;
- __entry->hw_free = (u64)uctxt->sc->hw_free;
- __entry->piobase = (u64)uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
- __entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys =
- uctxt->egrbufs.rcvtids[0].phys;
- ),
- TP_printk("[%s] ctxt %u " UCTXT_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->credits,
- __entry->hw_free,
- __entry->piobase,
- __entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
- __entry->eager_cnt,
- __entry->rcvegr_phys
- )
-);
-
-#define CINFO_FMT \
- "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
-TRACE_EVENT(hfi1_ctxt_info,
- TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt,
- struct hfi1_ctxt_info cinfo),
- TP_ARGS(dd, ctxt, subctxt, cinfo),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(unsigned, ctxt)
- __field(unsigned, subctxt)
- __field(u16, egrtids)
- __field(u16, rcvhdrq_cnt)
- __field(u16, rcvhdrq_size)
- __field(u16, sdma_ring_size)
- __field(u32, rcvegr_size)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->egrtids = cinfo.egrtids;
- __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
- __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
- __entry->sdma_ring_size = cinfo.sdma_ring_size;
- __entry->rcvegr_size = cinfo.rcvegr_size;
- ),
- TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->egrtids,
- __entry->rcvegr_size,
- __entry->rcvhdrq_cnt,
- __entry->rcvhdrq_size,
- __entry->sdma_ring_size
- )
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_sma
-
-#define BCT_FORMAT \
- "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]"
-
-#define BCT(field) \
- be16_to_cpu( \
- ((struct buffer_control *)__get_dynamic_array(bct))->field \
- )
-
-DECLARE_EVENT_CLASS(hfi1_bct_template,
- TP_PROTO(struct hfi1_devdata *dd,
- struct buffer_control *bc),
- TP_ARGS(dd, bc),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __dynamic_array(u8, bct, sizeof(*bc))
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- memcpy(__get_dynamic_array(bct), bc,
- sizeof(*bc));
- ),
- TP_printk(BCT_FORMAT,
- BCT(overall_shared_limit),
-
- BCT(vl[0].dedicated),
- BCT(vl[0].shared),
-
- BCT(vl[1].dedicated),
- BCT(vl[1].shared),
-
- BCT(vl[2].dedicated),
- BCT(vl[2].shared),
-
- BCT(vl[3].dedicated),
- BCT(vl[3].shared),
-
- BCT(vl[4].dedicated),
- BCT(vl[4].shared),
-
- BCT(vl[5].dedicated),
- BCT(vl[5].shared),
-
- BCT(vl[6].dedicated),
- BCT(vl[6].shared),
-
- BCT(vl[7].dedicated),
- BCT(vl[7].shared),
-
- BCT(vl[15].dedicated),
- BCT(vl[15].shared)
- )
-);
-
-DEFINE_EVENT(hfi1_bct_template, bct_set,
- TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
- TP_ARGS(dd, bc));
-
-DEFINE_EVENT(hfi1_bct_template, bct_get,
- TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
- TP_ARGS(dd, bc));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_sdma
-
-TRACE_EVENT(hfi1_sdma_descriptor,
- TP_PROTO(struct sdma_engine *sde,
- u64 desc0,
- u64 desc1,
- u16 e,
- void *descp),
- TP_ARGS(sde, desc0, desc1, e, descp),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(void *, descp)
- __field(u64, desc0)
- __field(u64, desc1)
- __field(u16, e)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->desc0 = desc0;
- __entry->desc1 = desc1;
- __entry->idx = sde->this_idx;
- __entry->descp = descp;
- __entry->e = e;
- ),
- TP_printk(
- "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
- __get_str(dev),
- __entry->idx,
- __parse_sdma_flags(__entry->desc0, __entry->desc1),
- (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
- SDMA_DESC0_PHY_ADDR_MASK,
- (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
- SDMA_DESC1_GENERATION_MASK),
- (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
- SDMA_DESC0_BYTE_COUNT_MASK),
- __entry->desc0,
- __entry->desc1,
- __entry->descp,
- __entry->e
- )
-);
-
-TRACE_EVENT(hfi1_sdma_engine_select,
- TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
- TP_ARGS(dd, sel, vl, idx),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(u32, sel)
- __field(u8, vl)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->sel = sel;
- __entry->vl = vl;
- __entry->idx = idx;
- ),
- TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
- __get_str(dev),
- __entry->idx,
- __entry->sel,
- __entry->vl
- )
-);
-
-DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
- TP_PROTO(struct sdma_engine *sde, u64 status),
- TP_ARGS(sde, status),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u64, status)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->status = status;
- __entry->idx = sde->this_idx;
- ),
- TP_printk("[%s] SDE(%u) status %llx",
- __get_str(dev),
- __entry->idx,
- (unsigned long long)__entry->status
- )
-);
-
-DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
- TP_PROTO(struct sdma_engine *sde, u64 status),
- TP_ARGS(sde, status)
-);
-
-DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
- TP_PROTO(struct sdma_engine *sde, u64 status),
- TP_ARGS(sde, status)
-);
-
-DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
- TP_PROTO(struct sdma_engine *sde, int aidx),
- TP_ARGS(sde, aidx),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(int, aidx)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->idx = sde->this_idx;
- __entry->aidx = aidx;
- ),
- TP_printk("[%s] SDE(%u) aidx %d",
- __get_str(dev),
- __entry->idx,
- __entry->aidx
- )
-);
-
-DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
- TP_PROTO(struct sdma_engine *sde, int aidx),
- TP_ARGS(sde, aidx));
-
-DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
- TP_PROTO(struct sdma_engine *sde, int aidx),
- TP_ARGS(sde, aidx));
-
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-TRACE_EVENT(hfi1_sdma_progress,
- TP_PROTO(struct sdma_engine *sde,
- u16 hwhead,
- u16 swhead,
- struct sdma_txreq *txp
- ),
- TP_ARGS(sde, hwhead, swhead, txp),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u64, sn)
- __field(u16, hwhead)
- __field(u16, swhead)
- __field(u16, txnext)
- __field(u16, tx_tail)
- __field(u16, tx_head)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->hwhead = hwhead;
- __entry->swhead = swhead;
- __entry->tx_tail = sde->tx_tail;
- __entry->tx_head = sde->tx_head;
- __entry->txnext = txp ? txp->next_descq_idx : ~0;
- __entry->idx = sde->this_idx;
- __entry->sn = txp ? txp->sn : ~0;
- ),
- TP_printk(
- "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
- __get_str(dev),
- __entry->idx,
- __entry->sn,
- __entry->hwhead,
- __entry->swhead,
- __entry->txnext,
- __entry->tx_head,
- __entry->tx_tail
- )
-);
-#else
-TRACE_EVENT(hfi1_sdma_progress,
- TP_PROTO(struct sdma_engine *sde,
- u16 hwhead, u16 swhead,
- struct sdma_txreq *txp
- ),
- TP_ARGS(sde, hwhead, swhead, txp),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u16, hwhead)
- __field(u16, swhead)
- __field(u16, txnext)
- __field(u16, tx_tail)
- __field(u16, tx_head)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->hwhead = hwhead;
- __entry->swhead = swhead;
- __entry->tx_tail = sde->tx_tail;
- __entry->tx_head = sde->tx_head;
- __entry->txnext = txp ? txp->next_descq_idx : ~0;
- __entry->idx = sde->this_idx;
- ),
- TP_printk(
- "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
- __get_str(dev),
- __entry->idx,
- __entry->hwhead,
- __entry->swhead,
- __entry->txnext,
- __entry->tx_head,
- __entry->tx_tail
- )
-);
-#endif
-
-DECLARE_EVENT_CLASS(hfi1_sdma_sn,
- TP_PROTO(struct sdma_engine *sde, u64 sn),
- TP_ARGS(sde, sn),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u64, sn)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->sn = sn;
- __entry->idx = sde->this_idx;
- ),
- TP_printk("[%s] SDE(%u) sn %llu",
- __get_str(dev),
- __entry->idx,
- __entry->sn
- )
-);
-
-DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 sn
- ),
- TP_ARGS(sde, sn)
-);
-
-DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
- TP_PROTO(struct sdma_engine *sde, u64 sn),
- TP_ARGS(sde, sn)
-);
-
-#define USDMA_HDR_FORMAT \
- "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x"
-
-TRACE_EVENT(hfi1_sdma_user_header,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
- struct hfi1_pkt_header *hdr, u32 tidval),
- TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u16, req)
- __field(__le32, pbc0)
- __field(__le32, pbc1)
- __field(__be32, lrh0)
- __field(__be32, lrh1)
- __field(__be32, bth0)
- __field(__be32, bth1)
- __field(__be32, bth2)
- __field(__le32, kdeth0)
- __field(__le32, kdeth1)
- __field(__le32, kdeth2)
- __field(__le32, kdeth3)
- __field(__le32, kdeth4)
- __field(__le32, kdeth5)
- __field(__le32, kdeth6)
- __field(__le32, kdeth7)
- __field(__le32, kdeth8)
- __field(u32, tidval)
- ),
- TP_fast_assign(
- __le32 *pbc = (__le32 *)hdr->pbc;
- __be32 *lrh = (__be32 *)hdr->lrh;
- __be32 *bth = (__be32 *)hdr->bth;
- __le32 *kdeth = (__le32 *)&hdr->kdeth;
-
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->req = req;
- __entry->pbc0 = pbc[0];
- __entry->pbc1 = pbc[1];
- __entry->lrh0 = be32_to_cpu(lrh[0]);
- __entry->lrh1 = be32_to_cpu(lrh[1]);
- __entry->bth0 = be32_to_cpu(bth[0]);
- __entry->bth1 = be32_to_cpu(bth[1]);
- __entry->bth2 = be32_to_cpu(bth[2]);
- __entry->kdeth0 = kdeth[0];
- __entry->kdeth1 = kdeth[1];
- __entry->kdeth2 = kdeth[2];
- __entry->kdeth3 = kdeth[3];
- __entry->kdeth4 = kdeth[4];
- __entry->kdeth5 = kdeth[5];
- __entry->kdeth6 = kdeth[6];
- __entry->kdeth7 = kdeth[7];
- __entry->kdeth8 = kdeth[8];
- __entry->tidval = tidval;
- ),
- TP_printk(USDMA_HDR_FORMAT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->req,
- __entry->pbc1,
- __entry->pbc0,
- __entry->lrh0,
- __entry->lrh1,
- __entry->bth0,
- __entry->bth1,
- __entry->bth2,
- __entry->kdeth0,
- __entry->kdeth1,
- __entry->kdeth2,
- __entry->kdeth3,
- __entry->kdeth4,
- __entry->kdeth5,
- __entry->kdeth6,
- __entry->kdeth7,
- __entry->kdeth8,
- __entry->tidval
- )
- );
-
-#define SDMA_UREQ_FMT \
- "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u"
-TRACE_EVENT(hfi1_sdma_user_reqinfo,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
- TP_ARGS(dd, ctxt, subctxt, i),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd);
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u8, ver_opcode)
- __field(u8, iovcnt)
- __field(u16, npkts)
- __field(u16, fragsize)
- __field(u16, comp_idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->ver_opcode = i[0] & 0xff;
- __entry->iovcnt = (i[0] >> 8) & 0xff;
- __entry->npkts = i[1];
- __entry->fragsize = i[2];
- __entry->comp_idx = i[3];
- ),
- TP_printk(SDMA_UREQ_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->ver_opcode,
- __entry->iovcnt,
- __entry->npkts,
- __entry->fragsize,
- __entry->comp_idx
- )
- );
-
-#define usdma_complete_name(st) { st, #st }
-#define show_usdma_complete_state(st) \
- __print_symbolic(st, \
- usdma_complete_name(FREE), \
- usdma_complete_name(QUEUED), \
- usdma_complete_name(COMPLETE), \
- usdma_complete_name(ERROR))
-
-TRACE_EVENT(hfi1_sdma_user_completion,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx,
- u8 state, int code),
- TP_ARGS(dd, ctxt, subctxt, idx, state, code),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u16, idx)
- __field(u8, state)
- __field(int, code)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->idx = idx;
- __entry->state = state;
- __entry->code = code;
- ),
- TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)",
- __get_str(dev), __entry->ctxt, __entry->subctxt,
- __entry->idx, show_usdma_complete_state(__entry->state),
- __entry->code)
- );
-
-const char *print_u32_array(struct trace_seq *, u32 *, int);
-#define __print_u32_hex(arr, len) print_u32_array(p, arr, len)
-
-TRACE_EVENT(hfi1_sdma_user_header_ahg,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
- u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval),
- TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u16, req)
- __field(u8, sde)
- __field(u8, idx)
- __field(int, len)
- __field(u32, tidval)
- __array(u32, ahg, 10)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->req = req;
- __entry->sde = sde;
- __entry->idx = ahgidx;
- __entry->len = len;
- __entry->tidval = tidval;
- memcpy(__entry->ahg, ahg, len * sizeof(u32));
- ),
- TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x",
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->req,
- __entry->sde,
- __entry->idx,
- __entry->len - 1,
- __print_u32_hex(__entry->ahg, __entry->len),
- __entry->tidval
- )
- );
-
-TRACE_EVENT(hfi1_sdma_state,
- TP_PROTO(struct sdma_engine *sde,
- const char *cstate,
- const char *nstate
- ),
- TP_ARGS(sde, cstate, nstate),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __string(curstate, cstate)
- __string(newstate, nstate)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __assign_str(curstate, cstate);
- __assign_str(newstate, nstate);
- ),
- TP_printk("[%s] current state %s new state %s",
- __get_str(dev),
- __get_str(curstate),
- __get_str(newstate)
- )
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_rc
-
-DECLARE_EVENT_CLASS(hfi1_rc_template,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, s_flags)
- __field(u32, psn)
- __field(u32, s_psn)
- __field(u32, s_next_psn)
- __field(u32, s_sending_psn)
- __field(u32, s_sending_hpsn)
- __field(u32, r_psn)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->s_flags = qp->s_flags;
- __entry->psn = psn;
- __entry->s_psn = qp->s_psn;
- __entry->s_next_psn = qp->s_next_psn;
- __entry->s_sending_psn = qp->s_sending_psn;
- __entry->s_sending_hpsn = qp->s_sending_hpsn;
- __entry->r_psn = qp->r_psn;
- ),
- TP_printk(
- "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
- __get_str(dev),
- __entry->qpn,
- __entry->s_flags,
- __entry->psn,
- __entry->s_psn,
- __entry->s_next_psn,
- __entry->s_sending_psn,
- __entry->s_sending_hpsn,
- __entry->r_psn
- )
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_misc
-
-TRACE_EVENT(hfi1_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
- int src),
- TP_ARGS(dd, is_entry, src),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __array(char, buf, 64)
- __field(int, src)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd)
- is_entry->is_name(__entry->buf, 64,
- src - is_entry->start);
- __entry->src = src;
- ),
- TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
- __entry->src)
-);
-
-/*
- * Note:
- * This produces a REALLY ugly trace in the console output when the string is
- * too long.
- */
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_trace
-
-#define MAX_MSG_LEN 512
-
-DECLARE_EVENT_CLASS(hfi1_trace_template,
- TP_PROTO(const char *function, struct va_format *vaf),
- TP_ARGS(function, vaf),
- TP_STRUCT__entry(__string(function, function)
- __dynamic_array(char, msg, MAX_MSG_LEN)
- ),
- TP_fast_assign(__assign_str(function, function);
- WARN_ON_ONCE(vsnprintf
- (__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >=
- MAX_MSG_LEN);
- ),
- TP_printk("(%s) %s",
- __get_str(function),
- __get_str(msg))
-);
-
-/*
- * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an
- * actual function to work and can not be in a macro.
- */
-#define __hfi1_trace_def(lvl) \
-void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \
- \
-DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \
- TP_PROTO(const char *function, struct va_format *vaf), \
- TP_ARGS(function, vaf))
-
-#define __hfi1_trace_fn(lvl) \
-void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
-{ \
- struct va_format vaf = { \
- .fmt = fmt, \
- }; \
- va_list args; \
- \
- va_start(args, fmt); \
- vaf.va = &args; \
- trace_hfi1_ ##lvl(func, &vaf); \
- va_end(args); \
- return; \
-}
-
-/*
- * To create a new trace level simply define it below and as a __hfi1_trace_fn
- * in trace.c. This will create all the hooks for calling
- * hfi1_cdbg(LVL, fmt, ...); as well as take care of all
- * the debugfs stuff.
- */
-__hfi1_trace_def(PKT);
-__hfi1_trace_def(PROC);
-__hfi1_trace_def(SDMA);
-__hfi1_trace_def(LINKVERB);
-__hfi1_trace_def(DEBUG);
-__hfi1_trace_def(SNOOP);
-__hfi1_trace_def(CNTR);
-__hfi1_trace_def(PIO);
-__hfi1_trace_def(DC8051);
-__hfi1_trace_def(FIRMWARE);
-__hfi1_trace_def(RCVCTRL);
-__hfi1_trace_def(TID);
-__hfi1_trace_def(MMU);
-__hfi1_trace_def(IOCTL);
-
-#define hfi1_cdbg(which, fmt, ...) \
- __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
-
-#define hfi1_dbg(fmt, ...) \
- hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__)
-
-/*
- * Define HFI1_EARLY_DBG at compile time or here to enable early trace
- * messages. Do not check in an enablement for this.
- */
-
-#ifdef HFI1_EARLY_DBG
-#define hfi1_dbg_early(fmt, ...) \
- trace_printk(fmt, ##__VA_ARGS__)
-#else
-#define hfi1_dbg_early(fmt, ...)
-#endif
-
-#endif /* __HFI1_TRACE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
+#include "trace_dbg.h"
+#include "trace_misc.h"
+#include "trace_ctxts.h"
+#include "trace_ibhdrs.h"
+#include "trace_rc.h"
+#include "trace_rx.h"
+#include "trace_tx.h"
--- /dev/null
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_CTXTS_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_ctxts
+
+#define UCTXT_FMT \
+ "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \
+ "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
+TRACE_EVENT(hfi1_uctxtdata,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
+ TP_ARGS(dd, uctxt),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(unsigned int, ctxt)
+ __field(u32, credits)
+ __field(u64, hw_free)
+ __field(void __iomem *, piobase)
+ __field(u16, rcvhdrq_cnt)
+ __field(u64, rcvhdrq_phys)
+ __field(u32, eager_cnt)
+ __field(u64, rcvegr_phys)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = uctxt->ctxt;
+ __entry->credits = uctxt->sc->credits;
+ __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
+ __entry->piobase = uctxt->sc->base_addr;
+ __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->eager_cnt = uctxt->egrbufs.alloced;
+ __entry->rcvegr_phys =
+ uctxt->egrbufs.rcvtids[0].phys;
+ ),
+ TP_printk("[%s] ctxt %u " UCTXT_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->credits,
+ __entry->hw_free,
+ __entry->piobase,
+ __entry->rcvhdrq_cnt,
+ __entry->rcvhdrq_phys,
+ __entry->eager_cnt,
+ __entry->rcvegr_phys
+ )
+);
+
+#define CINFO_FMT \
+ "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
+TRACE_EVENT(hfi1_ctxt_info,
+ TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
+ unsigned int subctxt,
+ struct hfi1_ctxt_info cinfo),
+ TP_ARGS(dd, ctxt, subctxt, cinfo),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(unsigned int, ctxt)
+ __field(unsigned int, subctxt)
+ __field(u16, egrtids)
+ __field(u16, rcvhdrq_cnt)
+ __field(u16, rcvhdrq_size)
+ __field(u16, sdma_ring_size)
+ __field(u32, rcvegr_size)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->egrtids = cinfo.egrtids;
+ __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
+ __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
+ __entry->sdma_ring_size = cinfo.sdma_ring_size;
+ __entry->rcvegr_size = cinfo.rcvegr_size;
+ ),
+ TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->egrtids,
+ __entry->rcvegr_size,
+ __entry->rcvhdrq_cnt,
+ __entry->rcvhdrq_size,
+ __entry->sdma_ring_size
+ )
+);
+
+#endif /* __HFI1_TRACE_CTXTS_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_ctxts
+#include <trace/define_trace.h>
--- /dev/null
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_EXTRA_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+/*
+ * Note:
+ * This produces a REALLY ugly trace in the console output when the string is
+ * too long.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_dbg
+
+#define MAX_MSG_LEN 512
+
+DECLARE_EVENT_CLASS(hfi1_trace_template,
+ TP_PROTO(const char *function, struct va_format *vaf),
+ TP_ARGS(function, vaf),
+ TP_STRUCT__entry(__string(function, function)
+ __dynamic_array(char, msg, MAX_MSG_LEN)
+ ),
+ TP_fast_assign(__assign_str(function, function);
+ WARN_ON_ONCE(vsnprintf
+ (__get_dynamic_array(msg),
+ MAX_MSG_LEN, vaf->fmt,
+ *vaf->va) >=
+ MAX_MSG_LEN);
+ ),
+ TP_printk("(%s) %s",
+ __get_str(function),
+ __get_str(msg))
+);
+
+/*
+ * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an
+ * actual function to work and can not be in a macro.
+ */
+#define __hfi1_trace_def(lvl) \
+void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \
+ \
+DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \
+ TP_PROTO(const char *function, struct va_format *vaf), \
+ TP_ARGS(function, vaf))
+
+#define __hfi1_trace_fn(lvl) \
+void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
+{ \
+ struct va_format vaf = { \
+ .fmt = fmt, \
+ }; \
+ va_list args; \
+ \
+ va_start(args, fmt); \
+ vaf.va = &args; \
+ trace_hfi1_ ##lvl(func, &vaf); \
+ va_end(args); \
+ return; \
+}
+
+/*
+ * To create a new trace level simply define it below and as a __hfi1_trace_fn
+ * in trace.c. This will create all the hooks for calling
+ * hfi1_cdbg(LVL, fmt, ...); as well as take care of all
+ * the debugfs stuff.
+ */
+__hfi1_trace_def(PKT);
+__hfi1_trace_def(PROC);
+__hfi1_trace_def(SDMA);
+__hfi1_trace_def(LINKVERB);
+__hfi1_trace_def(DEBUG);
+__hfi1_trace_def(SNOOP);
+__hfi1_trace_def(CNTR);
+__hfi1_trace_def(PIO);
+__hfi1_trace_def(DC8051);
+__hfi1_trace_def(FIRMWARE);
+__hfi1_trace_def(RCVCTRL);
+__hfi1_trace_def(TID);
+__hfi1_trace_def(MMU);
+__hfi1_trace_def(IOCTL);
+
+#define hfi1_cdbg(which, fmt, ...) \
+ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
+
+#define hfi1_dbg(fmt, ...) \
+ hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__)
+
+/*
+ * Define HFI1_EARLY_DBG at compile time or here to enable early trace
+ * messages. Do not check in an enablement for this.
+ */
+
+#ifdef HFI1_EARLY_DBG
+#define hfi1_dbg_early(fmt, ...) \
+ trace_printk(fmt, ##__VA_ARGS__)
+#else
+#define hfi1_dbg_early(fmt, ...)
+#endif
+
+#endif /* __HFI1_TRACE_EXTRA_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_dbg
+#include <trace/define_trace.h>
--- /dev/null
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IBHDRS_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_ibhdrs
+
+u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+
+#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
+
+#define lrh_name(lrh) { HFI1_##lrh, #lrh }
+#define show_lnh(lrh) \
+__print_symbolic(lrh, \
+ lrh_name(LRH_BTH), \
+ lrh_name(LRH_GRH))
+
+#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
+#define BTH_PRN \
+ "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
+ "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
+#define EHDR_PRN "%s"
+
+DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ /* LRH */
+ __field(u8, vl)
+ __field(u8, lver)
+ __field(u8, sl)
+ __field(u8, lnh)
+ __field(u16, dlid)
+ __field(u16, len)
+ __field(u16, slid)
+ /* BTH */
+ __field(u8, opcode)
+ __field(u8, se)
+ __field(u8, m)
+ __field(u8, pad)
+ __field(u8, tver)
+ __field(u16, pkey)
+ __field(u8, f)
+ __field(u8, b)
+ __field(u32, qpn)
+ __field(u8, a)
+ __field(u32, psn)
+ /* extended headers */
+ __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+ ),
+ TP_fast_assign(
+ struct hfi1_other_headers *ohdr;
+
+ DD_DEV_ASSIGN(dd);
+ /* LRH */
+ __entry->vl =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
+ __entry->lver =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
+ __entry->sl =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ __entry->lnh =
+ (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+ __entry->dlid =
+ be16_to_cpu(hdr->lrh[1]);
+ /* allow for larger len */
+ __entry->len =
+ be16_to_cpu(hdr->lrh[2]);
+ __entry->slid =
+ be16_to_cpu(hdr->lrh[3]);
+ /* BTH */
+ if (__entry->lnh == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ __entry->opcode =
+ (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+ __entry->se =
+ (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
+ __entry->m =
+ (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
+ __entry->pad =
+ (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ __entry->tver =
+ (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
+ __entry->pkey =
+ be32_to_cpu(ohdr->bth[0]) & 0xffff;
+ __entry->f =
+ (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
+ HFI1_FECN_MASK;
+ __entry->b =
+ (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
+ HFI1_BECN_MASK;
+ __entry->qpn =
+ be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+ __entry->a =
+ (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
+ /* allow for larger PSN */
+ __entry->psn =
+ be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+ /* extended headers */
+ memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+ ibhdr_exhdr_len(hdr));
+ ),
+ TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ /* LRH */
+ __entry->vl,
+ __entry->lver,
+ __entry->sl,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->dlid,
+ __entry->len,
+ __entry->slid,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->m,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->f,
+ __entry->b,
+ __entry->qpn,
+ __entry->a,
+ __entry->psn,
+ /* extended headers */
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
+);
+
+DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+#endif /* __HFI1_TRACE_IBHDRS_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_ibhdrs
+#include <trace/define_trace.h>
--- /dev/null
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_MISC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_misc
+
+TRACE_EVENT(hfi1_interrupt,
+ TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
+ int src),
+ TP_ARGS(dd, is_entry, src),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __array(char, buf, 64)
+ __field(int, src)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd)
+ is_entry->is_name(__entry->buf, 64,
+ src - is_entry->start);
+ __entry->src = src;
+ ),
+ TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
+ __entry->src)
+);
+
+#endif /* __HFI1_TRACE_MISC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_misc
+#include <trace/define_trace.h>
--- /dev/null
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_RC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_rc
+
+DECLARE_EVENT_CLASS(hfi1_rc_template,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u32, psn)
+ __field(u32, s_psn)
+ __field(u32, s_next_psn)
+ __field(u32, s_sending_psn)
+ __field(u32, s_sending_hpsn)
+ __field(u32, r_psn)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->psn = psn;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_next_psn = qp->s_next_psn;
+ __entry->s_sending_psn = qp->s_sending_psn;
+ __entry->s_sending_hpsn = qp->s_sending_hpsn;
+ __entry->r_psn = qp->r_psn;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->psn,
+ __entry->s_psn,
+ __entry->s_next_psn,
+ __entry->s_sending_psn,
+ __entry->s_sending_hpsn,
+ __entry->r_psn
+ )
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_sendcomplete,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+#endif /* __HFI1_TRACE_RC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rc
+#include <trace/define_trace.h>
--- /dev/null
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_RX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_rx
+
+TRACE_EVENT(hfi1_rcvhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ u32 ctxt,
+ u64 eflags,
+ u32 etype,
+ u32 hlen,
+ u32 tlen,
+ u32 updegr,
+ u32 etail
+ ),
+ TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, etype)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->eflags = eflags;
+ __entry->ctxt = ctxt;
+ __entry->etype = etype;
+ __entry->hlen = hlen;
+ __entry->tlen = tlen;
+ __entry->updegr = updegr;
+ __entry->etail = etail;
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->etype, show_packettype(__entry->etype),
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+
+TRACE_EVENT(hfi1_receive_interrupt,
+ TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+ TP_ARGS(dd, ctxt),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u32, ctxt)
+ __field(u8, slow_path)
+ __field(u8, dma_rtail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt) {
+ __entry->slow_path = 1;
+ __entry->dma_rtail = 0xFF;
+ } else if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt_dma_rtail){
+ __entry->dma_rtail = 1;
+ __entry->slow_path = 0;
+ } else if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt_nodma_rtail) {
+ __entry->dma_rtail = 0;
+ __entry->slow_path = 0;
+ }
+ ),
+ TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->slow_path,
+ __entry->dma_rtail
+ )
+);
+
+TRACE_EVENT(hfi1_exp_tid_reg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
+ u32 npages, unsigned long va, unsigned long pa,
+ dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
+ )
+ );
+
+TRACE_EVENT(hfi1_exp_tid_unreg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
+ )
+ );
+
+TRACE_EVENT(hfi1_exp_tid_inval,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
+ u32 npages, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(unsigned long, va)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->va = va;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->va,
+ __entry->dma
+ )
+ );
+
+TRACE_EVENT(hfi1_mmu_invalidate,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
+ unsigned long start, unsigned long end),
+ TP_ARGS(ctxt, subctxt, type, start, end),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __string(type, type)
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __assign_str(type, type);
+ __entry->start = start;
+ __entry->end = end;
+ ),
+ TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __get_str(type),
+ __entry->start,
+ __entry->end
+ )
+ );
+
+#define SNOOP_PRN \
+ "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
+ "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
+
+TRACE_EVENT(snoop_capture,
+ TP_PROTO(struct hfi1_devdata *dd,
+ int hdr_len,
+ struct hfi1_ib_header *hdr,
+ int data_len,
+ void *data),
+ TP_ARGS(dd, hdr_len, hdr, data_len, data),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, slid)
+ __field(u16, dlid)
+ __field(u32, qpn)
+ __field(u8, opcode)
+ __field(u8, sl)
+ __field(u16, pkey)
+ __field(u32, hdr_len)
+ __field(u32, data_len)
+ __field(u8, lnh)
+ __dynamic_array(u8, raw_hdr, hdr_len)
+ __dynamic_array(u8, raw_pkt, data_len)
+ ),
+ TP_fast_assign(
+ struct hfi1_other_headers *ohdr;
+
+ __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+ if (__entry->lnh == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ DD_DEV_ASSIGN(dd);
+ __entry->slid = be16_to_cpu(hdr->lrh[3]);
+ __entry->dlid = be16_to_cpu(hdr->lrh[1]);
+ __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+ __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+ __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
+ __entry->hdr_len = hdr_len;
+ __entry->data_len = data_len;
+ memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
+ memcpy(__get_dynamic_array(raw_pkt), data, data_len);
+ ),
+ TP_printk(
+ "[%s] " SNOOP_PRN,
+ __get_str(dev),
+ __entry->slid,
+ __entry->dlid,
+ __entry->qpn,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->sl,
+ __entry->pkey,
+ __entry->hdr_len,
+ __entry->data_len
+ )
+);
+
+#endif /* __HFI1_TRACE_RX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rx
+#include <trace/define_trace.h>
--- /dev/null
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_TX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+#include "mad.h"
+#include "sdma.h"
+
+const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
+
+#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_tx
+
+TRACE_EVENT(hfi1_piofree,
+ TP_PROTO(struct send_context *sc, int extra),
+ TP_ARGS(sc, extra),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+ __field(u32, sw_index)
+ __field(u32, hw_context)
+ __field(int, extra)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+ __entry->sw_index = sc->sw_index;
+ __entry->hw_context = sc->hw_context;
+ __entry->extra = extra;
+ ),
+ TP_printk("[%s] ctxt %u(%u) extra %d",
+ __get_str(dev),
+ __entry->sw_index,
+ __entry->hw_context,
+ __entry->extra
+ )
+);
+
+TRACE_EVENT(hfi1_wantpiointr,
+ TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
+ TP_ARGS(sc, needint, credit_ctrl),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+ __field(u32, sw_index)
+ __field(u32, hw_context)
+ __field(u32, needint)
+ __field(u64, credit_ctrl)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+ __entry->sw_index = sc->sw_index;
+ __entry->hw_context = sc->hw_context;
+ __entry->needint = needint;
+ __entry->credit_ctrl = credit_ctrl;
+ ),
+ TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
+ __get_str(dev),
+ __entry->sw_index,
+ __entry->hw_context,
+ __entry->needint,
+ (unsigned long long)__entry->credit_ctrl
+ )
+);
+
+DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, flags)
+ __field(u32, s_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->flags = flags;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->flags,
+ __entry->s_flags
+ )
+);
+
+DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags));
+
+DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags));
+
+TRACE_EVENT(hfi1_sdma_descriptor,
+ TP_PROTO(struct sdma_engine *sde,
+ u64 desc0,
+ u64 desc1,
+ u16 e,
+ void *descp),
+ TP_ARGS(sde, desc0, desc1, e, descp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(void *, descp)
+ __field(u64, desc0)
+ __field(u64, desc1)
+ __field(u16, e)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->desc0 = desc0;
+ __entry->desc1 = desc1;
+ __entry->idx = sde->this_idx;
+ __entry->descp = descp;
+ __entry->e = e;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
+ __get_str(dev),
+ __entry->idx,
+ __parse_sdma_flags(__entry->desc0, __entry->desc1),
+ (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
+ SDMA_DESC0_PHY_ADDR_MASK,
+ (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
+ SDMA_DESC1_GENERATION_MASK),
+ (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
+ SDMA_DESC0_BYTE_COUNT_MASK),
+ __entry->desc0,
+ __entry->desc1,
+ __entry->descp,
+ __entry->e
+ )
+);
+
+TRACE_EVENT(hfi1_sdma_engine_select,
+ TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
+ TP_ARGS(dd, sel, vl, idx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u32, sel)
+ __field(u8, vl)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->sel = sel;
+ __entry->vl = vl;
+ __entry->idx = idx;
+ ),
+ TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sel,
+ __entry->vl
+ )
+);
+
+DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, status)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->status = status;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk("[%s] SDE(%u) status %llx",
+ __get_str(dev),
+ __entry->idx,
+ (unsigned long long)__entry->status
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status)
+);
+
+DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status)
+);
+
+DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(int, aidx)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->idx = sde->this_idx;
+ __entry->aidx = aidx;
+ ),
+ TP_printk("[%s] SDE(%u) aidx %d",
+ __get_str(dev),
+ __entry->idx,
+ __entry->aidx
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx));
+
+DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx));
+
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+TRACE_EVENT(hfi1_sdma_progress,
+ TP_PROTO(struct sdma_engine *sde,
+ u16 hwhead,
+ u16 swhead,
+ struct sdma_txreq *txp
+ ),
+ TP_ARGS(sde, hwhead, swhead, txp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, sn)
+ __field(u16, hwhead)
+ __field(u16, swhead)
+ __field(u16, txnext)
+ __field(u16, tx_tail)
+ __field(u16, tx_head)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->hwhead = hwhead;
+ __entry->swhead = swhead;
+ __entry->tx_tail = sde->tx_tail;
+ __entry->tx_head = sde->tx_head;
+ __entry->txnext = txp ? txp->next_descq_idx : ~0;
+ __entry->idx = sde->this_idx;
+ __entry->sn = txp ? txp->sn : ~0;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sn,
+ __entry->hwhead,
+ __entry->swhead,
+ __entry->txnext,
+ __entry->tx_head,
+ __entry->tx_tail
+ )
+);
+#else
+TRACE_EVENT(hfi1_sdma_progress,
+ TP_PROTO(struct sdma_engine *sde,
+ u16 hwhead, u16 swhead,
+ struct sdma_txreq *txp
+ ),
+ TP_ARGS(sde, hwhead, swhead, txp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u16, hwhead)
+ __field(u16, swhead)
+ __field(u16, txnext)
+ __field(u16, tx_tail)
+ __field(u16, tx_head)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->hwhead = hwhead;
+ __entry->swhead = swhead;
+ __entry->tx_tail = sde->tx_tail;
+ __entry->tx_head = sde->tx_head;
+ __entry->txnext = txp ? txp->next_descq_idx : ~0;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->hwhead,
+ __entry->swhead,
+ __entry->txnext,
+ __entry->tx_head,
+ __entry->tx_tail
+ )
+);
+#endif
+
+DECLARE_EVENT_CLASS(hfi1_sdma_sn,
+ TP_PROTO(struct sdma_engine *sde, u64 sn),
+ TP_ARGS(sde, sn),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, sn)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->sn = sn;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk("[%s] SDE(%u) sn %llu",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sn
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
+ TP_PROTO(
+ struct sdma_engine *sde,
+ u64 sn
+ ),
+ TP_ARGS(sde, sn)
+);
+
+DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
+ TP_PROTO(struct sdma_engine *sde, u64 sn),
+ TP_ARGS(sde, sn)
+);
+
+#define USDMA_HDR_FORMAT \
+ "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x"
+
+TRACE_EVENT(hfi1_sdma_user_header,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
+ struct hfi1_pkt_header *hdr, u32 tidval),
+ TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u16, req)
+ __field(u32, pbc0)
+ __field(u32, pbc1)
+ __field(u32, lrh0)
+ __field(u32, lrh1)
+ __field(u32, bth0)
+ __field(u32, bth1)
+ __field(u32, bth2)
+ __field(u32, kdeth0)
+ __field(u32, kdeth1)
+ __field(u32, kdeth2)
+ __field(u32, kdeth3)
+ __field(u32, kdeth4)
+ __field(u32, kdeth5)
+ __field(u32, kdeth6)
+ __field(u32, kdeth7)
+ __field(u32, kdeth8)
+ __field(u32, tidval)
+ ),
+ TP_fast_assign(
+ __le32 *pbc = (__le32 *)hdr->pbc;
+ __be32 *lrh = (__be32 *)hdr->lrh;
+ __be32 *bth = (__be32 *)hdr->bth;
+ __le32 *kdeth = (__le32 *)&hdr->kdeth;
+
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->req = req;
+ __entry->pbc0 = le32_to_cpu(pbc[0]);
+ __entry->pbc1 = le32_to_cpu(pbc[1]);
+ __entry->lrh0 = be32_to_cpu(lrh[0]);
+ __entry->lrh1 = be32_to_cpu(lrh[1]);
+ __entry->bth0 = be32_to_cpu(bth[0]);
+ __entry->bth1 = be32_to_cpu(bth[1]);
+ __entry->bth2 = be32_to_cpu(bth[2]);
+ __entry->kdeth0 = le32_to_cpu(kdeth[0]);
+ __entry->kdeth1 = le32_to_cpu(kdeth[1]);
+ __entry->kdeth2 = le32_to_cpu(kdeth[2]);
+ __entry->kdeth3 = le32_to_cpu(kdeth[3]);
+ __entry->kdeth4 = le32_to_cpu(kdeth[4]);
+ __entry->kdeth5 = le32_to_cpu(kdeth[5]);
+ __entry->kdeth6 = le32_to_cpu(kdeth[6]);
+ __entry->kdeth7 = le32_to_cpu(kdeth[7]);
+ __entry->kdeth8 = le32_to_cpu(kdeth[8]);
+ __entry->tidval = tidval;
+ ),
+ TP_printk(USDMA_HDR_FORMAT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->req,
+ __entry->pbc1,
+ __entry->pbc0,
+ __entry->lrh0,
+ __entry->lrh1,
+ __entry->bth0,
+ __entry->bth1,
+ __entry->bth2,
+ __entry->kdeth0,
+ __entry->kdeth1,
+ __entry->kdeth2,
+ __entry->kdeth3,
+ __entry->kdeth4,
+ __entry->kdeth5,
+ __entry->kdeth6,
+ __entry->kdeth7,
+ __entry->kdeth8,
+ __entry->tidval
+ )
+);
+
+#define SDMA_UREQ_FMT \
+ "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u"
+TRACE_EVENT(hfi1_sdma_user_reqinfo,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
+ TP_ARGS(dd, ctxt, subctxt, i),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd);
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u8, ver_opcode)
+ __field(u8, iovcnt)
+ __field(u16, npkts)
+ __field(u16, fragsize)
+ __field(u16, comp_idx)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->ver_opcode = i[0] & 0xff;
+ __entry->iovcnt = (i[0] >> 8) & 0xff;
+ __entry->npkts = i[1];
+ __entry->fragsize = i[2];
+ __entry->comp_idx = i[3];
+ ),
+ TP_printk(SDMA_UREQ_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->ver_opcode,
+ __entry->iovcnt,
+ __entry->npkts,
+ __entry->fragsize,
+ __entry->comp_idx
+ )
+);
+
+#define usdma_complete_name(st) { st, #st }
+#define show_usdma_complete_state(st) \
+ __print_symbolic(st, \
+ usdma_complete_name(FREE), \
+ usdma_complete_name(QUEUED), \
+ usdma_complete_name(COMPLETE), \
+ usdma_complete_name(ERROR))
+
+TRACE_EVENT(hfi1_sdma_user_completion,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx,
+ u8 state, int code),
+ TP_ARGS(dd, ctxt, subctxt, idx, state, code),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u16, idx)
+ __field(u8, state)
+ __field(int, code)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->idx = idx;
+ __entry->state = state;
+ __entry->code = code;
+ ),
+ TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)",
+ __get_str(dev), __entry->ctxt, __entry->subctxt,
+ __entry->idx, show_usdma_complete_state(__entry->state),
+ __entry->code)
+);
+
+const char *print_u32_array(struct trace_seq *, u32 *, int);
+#define __print_u32_hex(arr, len) print_u32_array(p, arr, len)
+
+TRACE_EVENT(hfi1_sdma_user_header_ahg,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
+ u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval),
+ TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u16, req)
+ __field(u8, sde)
+ __field(u8, idx)
+ __field(int, len)
+ __field(u32, tidval)
+ __array(u32, ahg, 10)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->req = req;
+ __entry->sde = sde;
+ __entry->idx = ahgidx;
+ __entry->len = len;
+ __entry->tidval = tidval;
+ memcpy(__entry->ahg, ahg, len * sizeof(u32));
+ ),
+ TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->req,
+ __entry->sde,
+ __entry->idx,
+ __entry->len - 1,
+ __print_u32_hex(__entry->ahg, __entry->len),
+ __entry->tidval
+ )
+);
+
+TRACE_EVENT(hfi1_sdma_state,
+ TP_PROTO(struct sdma_engine *sde,
+ const char *cstate,
+ const char *nstate
+ ),
+ TP_ARGS(sde, cstate, nstate),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __string(curstate, cstate)
+ __string(newstate, nstate)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __assign_str(curstate, cstate);
+ __assign_str(newstate, nstate);
+ ),
+ TP_printk("[%s] current state %s new state %s",
+ __get_str(dev),
+ __get_str(curstate),
+ __get_str(newstate)
+ )
+);
+
+#define BCT_FORMAT \
+ "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]"
+
+#define BCT(field) \
+ be16_to_cpu( \
+ ((struct buffer_control *)__get_dynamic_array(bct))->field \
+ )
+
+DECLARE_EVENT_CLASS(hfi1_bct_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct buffer_control *bc),
+ TP_ARGS(dd, bc),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __dynamic_array(u8, bct, sizeof(*bc))
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ memcpy(__get_dynamic_array(bct), bc,
+ sizeof(*bc));
+ ),
+ TP_printk(BCT_FORMAT,
+ BCT(overall_shared_limit),
+
+ BCT(vl[0].dedicated),
+ BCT(vl[0].shared),
+
+ BCT(vl[1].dedicated),
+ BCT(vl[1].shared),
+
+ BCT(vl[2].dedicated),
+ BCT(vl[2].shared),
+
+ BCT(vl[3].dedicated),
+ BCT(vl[3].shared),
+
+ BCT(vl[4].dedicated),
+ BCT(vl[4].shared),
+
+ BCT(vl[5].dedicated),
+ BCT(vl[5].shared),
+
+ BCT(vl[6].dedicated),
+ BCT(vl[6].shared),
+
+ BCT(vl[7].dedicated),
+ BCT(vl[7].shared),
+
+ BCT(vl[15].dedicated),
+ BCT(vl[15].shared)
+ )
+);
+
+DEFINE_EVENT(hfi1_bct_template, bct_set,
+ TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
+ TP_ARGS(dd, bc));
+
+DEFINE_EVENT(hfi1_bct_template, bct_get,
+ TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
+ TP_ARGS(dd, bc));
+
+#endif /* __HFI1_TRACE_TX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tx
+#include <trace/define_trace.h>
+++ /dev/null
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "hfi.h"
-#include "twsi.h"
-
-/*
- * "Two Wire Serial Interface" support.
- *
- * Originally written for a not-quite-i2c serial eeprom, which is
- * still used on some supported boards. Later boards have added a
- * variety of other uses, most board-specific, so the bit-boffing
- * part has been split off to this file, while the other parts
- * have been moved to chip-specific files.
- *
- * We have also dropped all pretense of fully generic (e.g. pretend
- * we don't know whether '1' is the higher voltage) interface, as
- * the restrictions of the generic i2c interface (e.g. no access from
- * driver itself) make it unsuitable for this use.
- */
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the hfi1_ib device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip. Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct hfi1_devdata *dd, u32 target)
-{
- /*
- * implicit read of EXTStatus is as good as explicit
- * read of scratch, if all we want to do is flush
- * writes.
- */
- hfi1_gpio_mod(dd, target, 0, 0, 0);
- rmb(); /* inlined, so prevent compiler reordering */
-}
-
-/*
- * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that
- * for "almost compliant" modules
- */
-#define SCL_WAIT_USEC 1000
-
-/* BUF_WAIT is time bus must be free between STOP or ACK and to next START.
- * Should be 20, but some chips need more.
- */
-#define TWSI_BUF_WAIT_USEC 60
-
-static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
-{
- u32 mask;
-
- udelay(1);
-
- mask = QSFP_HFI0_I2CCLK;
-
- /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask);
-
- /*
- * Allow for slow slaves by simple
- * delay for falling edge, sampling on rise.
- */
- if (!bit) {
- udelay(2);
- } else {
- int rise_usec;
-
- for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
- if (mask & hfi1_gpio_mod(dd, target, 0, 0, 0))
- break;
- udelay(2);
- }
- if (rise_usec <= 0)
- dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
- SCL_WAIT_USEC);
- }
- i2c_wait_for_writes(dd, target);
-}
-
-static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait)
-{
- u32 read_val, mask;
-
- mask = QSFP_HFI0_I2CCLK;
- /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, 0, mask);
- read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
- if (wait)
- i2c_wait_for_writes(dd, target);
- return (read_val & mask) >> GPIO_SCL_NUM;
-}
-
-static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
-{
- u32 mask;
-
- mask = QSFP_HFI0_I2CDAT;
-
- /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask);
-
- i2c_wait_for_writes(dd, target);
- udelay(2);
-}
-
-static u8 sda_in(struct hfi1_devdata *dd, u32 target, int wait)
-{
- u32 read_val, mask;
-
- mask = QSFP_HFI0_I2CDAT;
- /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, 0, mask);
- read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
- if (wait)
- i2c_wait_for_writes(dd, target);
- return (read_val & mask) >> GPIO_SDA_NUM;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the hfi1_ib device
- */
-static int i2c_ackrcv(struct hfi1_devdata *dd, u32 target)
-{
- u8 ack_received;
-
- /* AT ENTRY SCL = LOW */
- /* change direction, ignore data */
- ack_received = sda_in(dd, target, 1);
- scl_out(dd, target, 1);
- ack_received = sda_in(dd, target, 1) == 0;
- scl_out(dd, target, 0);
- return ack_received;
-}
-
-static void stop_cmd(struct hfi1_devdata *dd, u32 target);
-
-/**
- * rd_byte - read a byte, sending STOP on last, else ACK
- * @dd: the hfi1_ib device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct hfi1_devdata *dd, u32 target, int last)
-{
- int bit_cntr, data;
-
- data = 0;
-
- for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
- data <<= 1;
- scl_out(dd, target, 1);
- data |= sda_in(dd, target, 0);
- scl_out(dd, target, 0);
- }
- if (last) {
- scl_out(dd, target, 1);
- stop_cmd(dd, target);
- } else {
- sda_out(dd, target, 0);
- scl_out(dd, target, 1);
- scl_out(dd, target, 0);
- sda_out(dd, target, 1);
- }
- return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the hfi1_ib device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct hfi1_devdata *dd, u32 target, u8 data)
-{
- int bit_cntr;
- u8 bit;
-
- for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
- bit = (data >> bit_cntr) & 1;
- sda_out(dd, target, bit);
- scl_out(dd, target, 1);
- scl_out(dd, target, 0);
- }
- return (!i2c_ackrcv(dd, target)) ? 1 : 0;
-}
-
-/*
- * issue TWSI start sequence:
- * (both clock/data high, clock high, data low while clock is high)
- */
-static void start_seq(struct hfi1_devdata *dd, u32 target)
-{
- sda_out(dd, target, 1);
- scl_out(dd, target, 1);
- sda_out(dd, target, 0);
- udelay(1);
- scl_out(dd, target, 0);
-}
-
-/**
- * stop_seq - transmit the stop sequence
- * @dd: the hfi1_ib device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_seq(struct hfi1_devdata *dd, u32 target)
-{
- scl_out(dd, target, 0);
- sda_out(dd, target, 0);
- scl_out(dd, target, 1);
- sda_out(dd, target, 1);
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the hfi1_ib device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct hfi1_devdata *dd, u32 target)
-{
- stop_seq(dd, target);
- udelay(TWSI_BUF_WAIT_USEC);
-}
-
-/**
- * hfi1_twsi_reset - reset I2C communication
- * @dd: the hfi1_ib device
- * returns 0 if ok, -EIO on error
- */
-int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
-{
- int clock_cycles_left = 9;
- u32 mask;
-
- /* Both SCL and SDA should be high. If not, there
- * is something wrong.
- */
- mask = QSFP_HFI0_I2CCLK | QSFP_HFI0_I2CDAT;
-
- /*
- * Force pins to desired innocuous state.
- * This is the default power-on state with out=0 and dir=0,
- * So tri-stated and should be floating high (barring HW problems)
- */
- hfi1_gpio_mod(dd, target, 0, 0, mask);
-
- /* Check if SCL is low, if it is low then we have a slave device
- * misbehaving and there is not much we can do.
- */
- if (!scl_in(dd, target, 0))
- return -EIO;
-
- /* Check if SDA is low, if it is low then we have to clock SDA
- * up to 9 times for the device to release the bus
- */
- while (clock_cycles_left--) {
- if (sda_in(dd, target, 0))
- return 0;
- scl_out(dd, target, 0);
- scl_out(dd, target, 1);
- }
-
- return -EIO;
-}
-
-#define HFI1_TWSI_START 0x100
-#define HFI1_TWSI_STOP 0x200
-
-/* Write byte to TWSI, optionally prefixed with START or suffixed with
- * STOP.
- * returns 0 if OK (ACK received), else != 0
- */
-static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags)
-{
- int ret = 1;
-
- if (flags & HFI1_TWSI_START)
- start_seq(dd, target);
-
- /* Leaves SCL low (from i2c_ackrcv()) */
- ret = wr_byte(dd, target, data);
-
- if (flags & HFI1_TWSI_STOP)
- stop_cmd(dd, target);
- return ret;
-}
-
-/* Added functionality for IBA7220-based cards */
-#define HFI1_TEMP_DEV 0x98
-
-/*
- * hfi1_twsi_blk_rd
- * General interface for data transfer from twsi devices.
- * One vestige of its former role is that it recognizes a device
- * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
- * which responded to all TWSI device codes, interpreting them as
- * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a N-byte "address" which selects
- * the "register" or "offset" within the device from which data should
- * be read.
- */
-int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- void *buffer, int len)
-{
- u8 *bp = buffer;
- int ret = 1;
- int i;
- int offset_size;
-
- /* obtain the offset size, strip it from the device address */
- offset_size = (dev >> 8) & 0xff;
- dev &= 0xff;
-
- /* allow at most a 2 byte offset */
- if (offset_size > 2)
- goto bail;
-
- if (dev == HFI1_TWSI_NO_DEV) {
- /* legacy not-really-I2C */
- addr = (addr << 1) | READ_CMD;
- ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
- } else {
- /* Actual I2C */
- if (offset_size) {
- ret = twsi_wr(dd, target,
- dev | WRITE_CMD, HFI1_TWSI_START);
- if (ret) {
- stop_cmd(dd, target);
- goto bail;
- }
-
- for (i = 0; i < offset_size; i++) {
- ret = twsi_wr(dd, target,
- (addr >> (i * 8)) & 0xff, 0);
- udelay(TWSI_BUF_WAIT_USEC);
- if (ret) {
- dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
- i, addr);
- goto bail;
- }
- }
- }
- ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
- }
- if (ret) {
- stop_cmd(dd, target);
- goto bail;
- }
-
- /*
- * block devices keeps clocking data out as long as we ack,
- * automatically incrementing the address. Some have "pages"
- * whose boundaries will not be crossed, but the handling
- * of these is left to the caller, who is in a better
- * position to know.
- */
- while (len-- > 0) {
- /*
- * Get and store data, sending ACK if length remaining,
- * else STOP
- */
- *bp++ = rd_byte(dd, target, !len);
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-/*
- * hfi1_twsi_blk_wr
- * General interface for data transfer to twsi devices.
- * One vestige of its former role is that it recognizes a device
- * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
- * which responded to all TWSI device codes, interpreting them as
- * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a N-byte "address" which selects
- * the "register" or "offset" within the device to which data should
- * be written.
- */
-int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- const void *buffer, int len)
-{
- const u8 *bp = buffer;
- int ret = 1;
- int i;
- int offset_size;
-
- /* obtain the offset size, strip it from the device address */
- offset_size = (dev >> 8) & 0xff;
- dev &= 0xff;
-
- /* allow at most a 2 byte offset */
- if (offset_size > 2)
- goto bail;
-
- if (dev == HFI1_TWSI_NO_DEV) {
- if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
- HFI1_TWSI_START)) {
- goto failed_write;
- }
- } else {
- /* Real I2C */
- if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START))
- goto failed_write;
- }
-
- for (i = 0; i < offset_size; i++) {
- ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0);
- udelay(TWSI_BUF_WAIT_USEC);
- if (ret) {
- dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
- i, addr);
- goto bail;
- }
- }
-
- for (i = 0; i < len; i++)
- if (twsi_wr(dd, target, *bp++, 0))
- goto failed_write;
-
- ret = 0;
-
-failed_write:
- stop_cmd(dd, target);
-
-bail:
- return ret;
-}
+++ /dev/null
-#ifndef _TWSI_H
-#define _TWSI_H
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#define HFI1_TWSI_NO_DEV 0xFF
-
-struct hfi1_devdata;
-
-/* Bit position of SDA/SCL pins in ASIC_QSFP* registers */
-#define GPIO_SDA_NUM 1
-#define GPIO_SCL_NUM 0
-
-/* these functions must be called with qsfp_lock held */
-int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
-int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- void *buffer, int len);
-int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- const void *buffer, int len);
-
-#endif /* _TWSI_H */
clear_ahg(qp);
goto bail;
}
+ /*
+ * Local operations are processed immediately
+ * after all prior requests have completed.
+ */
+ if (wqe->wr.opcode == IB_WR_REG_MR ||
+ wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ int local_ops = 0;
+ int err = 0;
+
+ if (qp->s_last != qp->s_cur)
+ goto bail;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ err = rvt_invalidate_rkey(
+ qp, wqe->wr.ex.invalidate_rkey);
+ local_ops = 1;
+ }
+ hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
+ if (local_ops)
+ atomic_dec(&qp->local_ops_pending);
+ qp->s_hdrwords = 0;
+ goto done_free_tx;
+ }
/*
* Start a new request.
*/
struct ib_reth *reth;
int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
- u32 bth1;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
return;
- bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
- if (bth1 & HFI1_BECN_SMASK) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 rqpn, lqpn;
- u16 rlid = be16_to_cpu(hdr->lrh[3]);
- u8 sl, sc5;
-
- lqpn = bth1 & RVT_QPN_MASK;
- rqpn = qp->remote_qpn;
-
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
- sl = ibp->sc_to_sl[sc5];
-
- process_becn(ppd, sl, rlid, lqpn, rqpn,
- IB_CC_SVCTYPE_UC);
- }
-
- if (bth1 & HFI1_FECN_SMASK) {
- struct ib_grh *grh = NULL;
- u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- u16 slid = be16_to_cpu(hdr->lrh[3]);
- u16 dlid = be16_to_cpu(hdr->lrh[1]);
- u32 src_qp = qp->remote_qpn;
- u8 sc5;
-
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
- if (has_grh)
- grh = &hdr->u.l.grh;
-
- return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5,
- grh);
- }
- }
+ process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
opcode = (bth0 >> 24) & 0xff;
}
if (ah_attr->ah_flags & IB_AH_GRH) {
- hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
- sizeof(struct ib_grh), 1, 0);
+ struct ib_grh grh;
+ struct ib_global_route grd = ah_attr->grh;
+
+ hfi1_make_grh(ibp, &grh, &grd, 0, 0);
+ hfi1_copy_sge(&qp->r_sge, &grh,
+ sizeof(grh), 1, 0);
wc.wc_flags |= IB_WC_GRH;
} else {
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
- priv->s_hdr->ahgcount = 0;
- priv->s_hdr->ahgidx = 0;
- priv->s_hdr->tx_flags = 0;
- priv->s_hdr->sde = NULL;
+ priv->s_ahg->ahgcount = 0;
+ priv->s_ahg->ahgidx = 0;
+ priv->s_ahg->tx_flags = 0;
/* pbc */
ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
struct hfi1_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
- u32 pad;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
u16 dlid, pkey;
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
u32 bth1;
- int is_mcast;
- struct ib_grh *grh = NULL;
+ u8 sl_from_sc, sl;
+ u16 slid;
+ u8 extra_bytes;
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
dlid = be16_to_cpu(hdr->lrh[1]);
- is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & HFI1_BECN_SMASK)) {
- /*
- * In pre-B0 h/w the CNP_OPCODE is handled via an
- * error path.
- */
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- u8 sl;
-
- sl = ibp->sc_to_sl[sc5];
-
- process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
- }
+ slid = be16_to_cpu(hdr->lrh[3]);
+ pkey = (u16)be32_to_cpu(ohdr->bth[0]);
+ sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ extra_bytes += (SIZE_OF_CRC << 2);
+ sl_from_sc = ibp->sc_to_sl[sc5];
- /*
- * The opcode is in the low byte when its in network order
- * (top byte when in host order).
- */
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
opcode &= 0xff;
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
-
- if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
- u16 slid = be16_to_cpu(hdr->lrh[3]);
-
- return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
- }
+ process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
*/
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- if (unlikely(tlen < (hdrsize + pad + 4)))
+ if (unlikely(tlen < (hdrsize + extra_bytes)))
goto drop;
- tlen -= hdrsize + pad + 4;
+ tlen -= hdrsize + extra_bytes;
/*
* Check that the permissive LID is only used on QP0
hdr->lrh[3] == IB_LID_PERMISSIVE))
goto drop;
if (qp->ibqp.qp_num > 1) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u16 slid;
-
- slid = be16_to_cpu(hdr->lrh[3]);
if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
/*
* Traps will not be sent for packets dropped
* IB spec (release 1.3, section 10.9.4)
*/
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- pkey,
- (be16_to_cpu(hdr->lrh[0]) >> 4) &
- 0xF,
+ pkey, sl,
src_qp, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ slid, dlid);
return;
}
} else {
goto drop;
}
if (unlikely(qkey != qp->qkey)) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
src_qp, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ slid, dlid);
return;
}
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
- (tlen > 2048 ||
- (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
+ (tlen > 2048 || (sc5 == 0xF))))
goto drop;
} else {
/* Received on QP0, and so by definition, this is an SMP */
struct opa_smp *smp = (struct opa_smp *)data;
- u16 slid = be16_to_cpu(hdr->lrh[3]);
if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
goto drop;
qp->ibqp.qp_type == IB_QPT_SMI) {
if (mgmt_pkey_idx < 0) {
if (net_ratelimit()) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = ppd->dd;
dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n",
wc.pkey_index = 0;
}
- wc.slid = be16_to_cpu(hdr->lrh[3]);
- wc.sl = ibp->sc_to_sl[sc5];
+ wc.slid = slid;
+ wc.sl = sl_from_sc;
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
- struct rb_root *);
+ struct hfi1_filedata *);
static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
-static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
- struct mm_struct *);
-static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int tid_rb_insert(void *, struct mmu_rb_node *);
+static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
+ struct tid_rb_node *tnode);
+static void tid_rb_remove(void *, struct mmu_rb_node *);
+static int tid_rb_invalidate(void *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
u32 *, unsigned *, unsigned *);
static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
-static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static struct mmu_rb_ops tid_rb_ops = {
- .insert = mmu_rb_insert,
- .remove = mmu_rb_remove,
- .invalidate = mmu_rb_invalidate
+ .insert = tid_rb_insert,
+ .remove = tid_rb_remove,
+ .invalidate = tid_rb_invalidate
};
static inline u32 rcventry2tidinfo(u32 rcventry)
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
- fd->tid_rb_root = RB_ROOT;
if (!uctxt->subctxt_cnt || !fd->subctxt) {
exp_tid_group_init(&uctxt->tid_group_list);
if (!fd->entry_to_rb)
return -ENOMEM;
- if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+ if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
fd->invalid_tids = kzalloc(uctxt->expected_count *
sizeof(u32), GFP_KERNEL);
/*
* Register MMU notifier callbacks. If the registration
- * fails, continue but turn off the TID caching for
- * all user contexts.
+ * fails, continue without TID caching for this context.
*/
- ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
+ ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
+ dd->pport->hfi1_wq,
+ &fd->handler);
if (ret) {
dd_dev_info(dd,
"Failed MMU notifier registration %d\n",
ret);
- HFI1_CAP_USET(TID_UNMAP);
ret = 0;
}
}
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+ if (uctxt->subctxt_cnt && fd->handler) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
* The notifier would have been removed when the process'es mm
* was freed.
*/
- if (!HFI1_CAP_IS_USET(TID_UNMAP))
- hfi1_mmu_rb_unregister(&fd->tid_rb_root);
+ if (fd->handler)
+ hfi1_mmu_rb_unregister(fd->handler);
kfree(fd->invalid_tids);
if (!uctxt->cnt) {
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list,
- &fd->tid_rb_root);
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list,
- &fd->tid_rb_root);
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
* pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers.
*/
- if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) {
+ if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
ret = -ENOMEM;
goto bail;
}
- pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
+ pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
if (pinned <= 0) {
ret = pinned;
goto bail;
* for example), unpin all unmapped pages so we can pin them nex time.
*/
if (mapped_pages != pinned) {
- hfi1_release_user_pages(current->mm, &pages[mapped_pages],
+ hfi1_release_user_pages(fd->mm, &pages[mapped_pages],
pinned - mapped_pages,
false);
fd->tid_n_pinned -= pinned - mapped_pages;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
- struct rb_root *root = &fd->tid_rb_root;
dma_addr_t phys;
/*
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- if (HFI1_CAP_IS_USET(TID_UNMAP))
- ret = mmu_rb_insert(root, &node->mmu);
+ if (!fd->handler)
+ ret = tid_rb_insert(fd, &node->mmu);
else
- ret = hfi1_mmu_rb_insert(root, &node->mmu);
+ ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
if (ret) {
hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
node = fd->entry_to_rb[rcventry];
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- if (HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
- else
- hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
if (grp)
*grp = node->grp;
- clear_tid_node(fd, fd->subctxt, node);
+
+ if (!fd->handler)
+ cacheless_tid_rb_remove(fd, node);
+ else
+ hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+
return 0;
}
-static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
- struct tid_rb_node *node)
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);
- hfi1_release_user_pages(current->mm, node->pages, node->npages, true);
+ hfi1_release_user_pages(fd->mm, node->pages, node->npages, true);
fd->tid_n_pinned -= node->npages;
node->grp->used--;
kfree(node);
}
+/*
+ * As a simple helper for hfi1_user_exp_rcv_free, this function deals with
+ * clearing nodes in the non-cached case.
+ */
static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
- struct exp_tid_set *set, struct rb_root *root)
+ struct exp_tid_set *set,
+ struct hfi1_filedata *fd)
{
struct tid_group *grp, *ptr;
- struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata,
- tid_rb_root);
int i;
list_for_each_entry_safe(grp, ptr, &set->list, list) {
uctxt->expected_base];
if (!node || node->rcventry != rcventry)
continue;
- if (HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_rb_remove(&fd->tid_rb_root,
- &node->mmu, NULL);
- else
- hfi1_mmu_rb_remove(&fd->tid_rb_root,
- &node->mmu);
- clear_tid_node(fd, -1, node);
+
+ cacheless_tid_rb_remove(fd, node);
}
}
}
}
-static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+/*
+ * Always return 0 from this function. A non-zero return indicates that the
+ * remove operation will be called and that memory should be unpinned.
+ * However, the driver cannot unpin out from under PSM. Instead, retain the
+ * memory (by returning 0) and inform PSM that the memory is going away. PSM
+ * will call back later when it has removed the memory from its list.
+ */
+static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
{
- struct hfi1_filedata *fdata =
- container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_filedata *fdata = arg;
struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
container_of(mnode, struct tid_rb_node, mmu);
return 0;
}
-static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
+static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
{
- struct hfi1_filedata *fdata =
- container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_filedata *fdata = arg;
struct tid_rb_node *tnode =
container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
return 0;
}
-static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
- struct mm_struct *mm)
+static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
+ struct tid_rb_node *tnode)
{
- struct hfi1_filedata *fdata =
- container_of(root, struct hfi1_filedata, tid_rb_root);
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
+ clear_tid_node(fdata, tnode);
+}
+
+static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
+{
+ struct hfi1_filedata *fdata = arg;
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
+
+ cacheless_tid_rb_remove(fdata, tnode);
}
* could keeping caching buffers.
*
*/
-bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
+ u32 nlocked, u32 npages)
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
/* Convert to number of pages */
size = DIV_ROUND_UP(size, PAGE_SIZE);
- down_read(¤t->mm->mmap_sem);
- pinned = current->mm->pinned_vm;
- up_read(¤t->mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ pinned = mm->pinned_vm;
+ up_read(&mm->mmap_sem);
/* First, check the absolute limit against all pinned pages. */
if (pinned + npages >= ulimit && !can_lock)
return ((nlocked + npages) <= size) || can_lock;
}
-int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
- struct page **pages)
+int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,
+ bool writable, struct page **pages)
{
int ret;
if (ret < 0)
return ret;
- down_write(¤t->mm->mmap_sem);
- current->mm->pinned_vm += ret;
- up_write(¤t->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ mm->pinned_vm += ret;
+ up_write(&mm->mmap_sem);
return ret;
}
/* Last packet in the request */
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
-#define SDMA_REQ_IN_USE 0
+/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
#define SDMA_REQ_SEND_DONE 2
#define SDMA_REQ_HAVE_AHG 3
struct sdma_mmu_node *node;
};
-#define SDMA_CACHE_NODE_EVICT 0
-
struct sdma_mmu_node {
struct mmu_rb_node rb;
- struct list_head list;
struct hfi1_user_sdma_pkt_q *pq;
atomic_t refcount;
struct page **pages;
unsigned npages;
- unsigned long flags;
+};
+
+/* evict operation argument */
+struct evict_data {
+ u32 cleared; /* count evicted so far */
+ u32 target; /* target count to evict */
};
struct user_sdma_request {
unsigned seq);
static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
-static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
- struct mm_struct *);
-static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int sdma_rb_insert(void *, struct mmu_rb_node *);
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *arg2, bool *stop);
+static void sdma_rb_remove(void *, struct mmu_rb_node *);
+static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = {
.filter = sdma_rb_filter,
.insert = sdma_rb_insert,
+ .evict = sdma_rb_evict,
.remove = sdma_rb_remove,
.invalidate = sdma_rb_invalidate
};
if (!pq->reqs)
goto pq_reqs_nomem;
+ memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
+ pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
+ if (!pq->req_in_use)
+ goto pq_reqs_no_in_use;
+
INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
- pq->sdma_rb_root = RB_ROOT;
- INIT_LIST_HEAD(&pq->evict);
- spin_lock_init(&pq->evict_lock);
+ atomic_set(&pq->n_locked, 0);
+ pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
activate_packet_queue, NULL);
cq->nentries = hfi1_sdma_comp_ring_size;
fd->cq = cq;
- ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+ ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
goto done;
cq_nomem:
kmem_cache_destroy(pq->txreq_cache);
pq_txreq_nomem:
+ kfree(pq->req_in_use);
+pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
kfree(pq);
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
uctxt->ctxt, fd->subctxt);
pq = fd->pq;
- hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
if (pq) {
+ if (pq->handler)
+ hfi1_mmu_rb_unregister(pq->handler);
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
if (!list_empty(&pq->list))
list_del_init(&pq->list);
pq->wait,
(ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
kfree(pq->reqs);
+ kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
kfree(pq);
fd->pq = NULL;
return 0;
}
+static u8 dlid_to_selector(u16 dlid)
+{
+ static u8 mapping[256];
+ static int initialized;
+ static u8 next;
+ int hash;
+
+ if (!initialized) {
+ memset(mapping, 0xFF, 256);
+ initialized = 1;
+ }
+
+ hash = ((dlid >> 8) ^ dlid) & 0xFF;
+ if (mapping[hash] == 0xFF) {
+ mapping[hash] = next;
+ next = (next + 1) & 0x7F;
+ }
+
+ return mapping[hash];
+}
+
int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
unsigned long dim, unsigned long *count)
{
- int ret = 0, i = 0;
+ int ret = 0, i;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct user_sdma_request *req;
u8 opcode, sc, vl;
int req_queued = 0;
+ u16 dlid;
+ u8 selector;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info);
- if (cq->comps[info.comp_idx].status == QUEUED ||
- test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
- hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
- dd->unit, uctxt->ctxt, fd->subctxt,
- info.comp_idx);
- return -EBADSLT;
+
+ if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
+ hfi1_cdbg(SDMA,
+ "[%u:%u:%u:%u] Invalid comp index",
+ dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
+ return -EINVAL;
}
+
+ /*
+ * Sanity check the header io vector count. Need at least 1 vector
+ * (header) and cannot be larger than the actual io vector count.
+ */
+ if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) {
+ hfi1_cdbg(SDMA,
+ "[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
+ dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
+ req_iovcnt(info.ctrl), dim);
+ return -EINVAL;
+ }
+
if (!info.fragsize) {
hfi1_cdbg(SDMA,
"[%u:%u:%u:%u] Request does not specify fragsize",
dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
return -EINVAL;
}
+
+ /* Try to claim the request. */
+ if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
+ hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
+ dd->unit, uctxt->ctxt, fd->subctxt,
+ info.comp_idx);
+ return -EBADSLT;
+ }
/*
- * We've done all the safety checks that we can up to this point,
- * "allocate" the request entry.
+ * All safety checks have been done and this request has been claimed.
*/
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
req = pq->reqs + info.comp_idx;
memset(req, 0, sizeof(*req));
- /* Mark the request as IN_USE before we start filling it in. */
- set_bit(SDMA_REQ_IN_USE, &req->flags);
- req->data_iovs = req_iovcnt(info.ctrl) - 1;
+ req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->pq = pq;
req->cq = cq;
req->status = -1;
memcpy(&req->info, &info, sizeof(info));
- if (req_opcode(info.ctrl) == EXPECTED)
+ if (req_opcode(info.ctrl) == EXPECTED) {
+ /* expected must have a TID info and at least one data vector */
+ if (req->data_iovs < 2) {
+ SDMA_DBG(req,
+ "Not enough vectors for expected request");
+ ret = -EINVAL;
+ goto free_req;
+ }
req->data_iovs--;
+ }
if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) {
SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
MAX_VECTORS_PER_REQ);
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_req;
}
/* Copy the header from the user buffer */
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
idx++;
/* Save all the IO vector structures */
- while (i < req->data_iovs) {
+ for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
ret = pin_vector_pages(req, &req->iovs[i]);
req->status = ret;
goto free_req;
}
- req->data_len += req->iovs[i++].iov.iov_len;
+ req->data_len += req->iovs[i].iov.iov_len;
}
SDMA_DBG(req, "total data length %u", req->data_len);
idx++;
}
+ dlid = be16_to_cpu(req->hdr.lrh[1]);
+ selector = dlid_to_selector(dlid);
+
/* Have to select the engine */
req->sde = sdma_select_engine_vl(dd,
- (u32)(uctxt->ctxt + fd->subctxt),
+ (u32)(uctxt->ctxt + fd->subctxt +
+ selector),
vl);
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM;
* The size of the data of the first packet is in the header
* template. However, it includes the header and ICRC, which need
* to be subtracted.
+ * The minimum representable packet data length in a header is 4 bytes,
+ * therefore, when the data length request is less than 4 bytes, there's
+ * only one packet, and the packet data length is equal to that of the
+ * request data length.
* The size of the remaining packets is the minimum of the frag
* size (MTU) or remaining data in the request.
*/
u32 len;
if (!req->seqnum) {
- len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
- (sizeof(tx->hdr) - 4));
+ if (req->data_len < sizeof(u32))
+ len = req->data_len;
+ else
+ len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
+ (sizeof(tx->hdr) - 4));
} else if (req_opcode(req->info.ctrl) == EXPECTED) {
u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
PAGE_SIZE;
return len;
}
+static inline u32 pad_len(u32 len)
+{
+ if (len & (sizeof(u32) - 1))
+ len += sizeof(u32) - (len & (sizeof(u32) - 1));
+ return len;
+}
+
static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
{
/* (Size of complete header - size of PBC) + 4B ICRC + data length */
if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
if (!req->seqnum) {
u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
- u32 lrhlen = get_lrh_len(req->hdr, datalen);
+ u32 lrhlen = get_lrh_len(req->hdr,
+ pad_len(datalen));
/*
* Copy the request header into the tx header
* because the HW needs a cacheline-aligned
static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
{
- u32 cleared = 0;
- struct sdma_mmu_node *node, *ptr;
- struct list_head to_evict = LIST_HEAD_INIT(to_evict);
-
- spin_lock(&pq->evict_lock);
- list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
- /* Make sure that no one is still using the node. */
- if (!atomic_read(&node->refcount)) {
- set_bit(SDMA_CACHE_NODE_EVICT, &node->flags);
- list_del_init(&node->list);
- list_add(&node->list, &to_evict);
- cleared += node->npages;
- if (cleared >= npages)
- break;
- }
- }
- spin_unlock(&pq->evict_lock);
-
- list_for_each_entry_safe(node, ptr, &to_evict, list)
- hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+ struct evict_data evict_data;
- return cleared;
+ evict_data.cleared = 0;
+ evict_data.target = npages;
+ hfi1_mmu_rb_evict(pq->handler, &evict_data);
+ return evict_data.cleared;
}
static int pin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec) {
+ struct user_sdma_iovec *iovec)
+{
int ret = 0, pinned, npages, cleared;
struct page **pages;
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
- rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root,
+ rb_node = hfi1_mmu_rb_extract(pq->handler,
(unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len);
if (rb_node && !IS_ERR(rb_node))
node->rb.addr = (unsigned long)iovec->iov.iov_base;
node->pq = pq;
atomic_set(&node->refcount, 0);
- INIT_LIST_HEAD(&node->list);
}
npages = num_user_pages(&iovec->iov);
npages -= node->npages;
- /*
- * If rb_node is NULL, it means that this is brand new node
- * and, therefore not on the eviction list.
- * If, however, the rb_node is non-NULL, it means that the
- * node is already in RB tree and, therefore on the eviction
- * list (nodes are unconditionally inserted in the eviction
- * list). In that case, we have to remove the node prior to
- * calling the eviction function in order to prevent it from
- * freeing this node.
- */
- if (rb_node) {
- spin_lock(&pq->evict_lock);
- list_del_init(&node->list);
- spin_unlock(&pq->evict_lock);
- }
retry:
- if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+ if (!hfi1_can_pin_pages(pq->dd, pq->mm,
+ atomic_read(&pq->n_locked), npages)) {
cleared = sdma_cache_evict(pq, npages);
if (cleared >= npages)
goto retry;
}
- pinned = hfi1_acquire_user_pages(
+ pinned = hfi1_acquire_user_pages(pq->mm,
((unsigned long)iovec->iov.iov_base +
(node->npages * PAGE_SIZE)), npages, 0,
pages + node->npages);
goto bail;
}
if (pinned != npages) {
- unpin_vector_pages(current->mm, pages, node->npages,
+ unpin_vector_pages(pq->mm, pages, node->npages,
pinned);
ret = -EFAULT;
goto bail;
node->pages = pages;
node->npages += pinned;
npages = node->npages;
- spin_lock(&pq->evict_lock);
- list_add(&node->list, &pq->evict);
- pq->n_locked += pinned;
- spin_unlock(&pq->evict_lock);
+ atomic_add(pinned, &pq->n_locked);
}
iovec->pages = node->pages;
iovec->npages = npages;
iovec->node = node;
- ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+ ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb);
if (ret) {
- spin_lock(&pq->evict_lock);
- if (!list_empty(&node->list))
- list_del(&node->list);
- pq->n_locked -= node->npages;
- spin_unlock(&pq->evict_lock);
+ atomic_sub(node->npages, &pq->n_locked);
+ iovec->node = NULL;
goto bail;
}
return 0;
bail:
if (rb_node)
- unpin_vector_pages(current->mm, node->pages, 0, node->npages);
+ unpin_vector_pages(pq->mm, node->pages, 0, node->npages);
kfree(node);
return ret;
}
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
unsigned start, unsigned npages)
{
- hfi1_release_user_pages(mm, pages + start, npages, 0);
+ hfi1_release_user_pages(mm, pages + start, npages, false);
kfree(pages);
}
/*
* Perform safety checks for any type of packet:
* - transfer size is multiple of 64bytes
- * - packet length is multiple of 4bytes
- * - entire request length is multiple of 4bytes
+ * - packet length is multiple of 4 bytes
* - packet length is not larger than MTU size
*
* These checks are only done for the first packet of the
* transfer since the header is "given" to us by user space.
* For the remainder of the packets we compute the values.
*/
- if (req->info.fragsize % PIO_BLOCK_SIZE ||
- lrhlen & 0x3 || req->data_len & 0x3 ||
+ if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 ||
lrhlen > get_lrh_len(*hdr, req->info.fragsize))
return -EINVAL;
struct hfi1_pkt_header *hdr = &tx->hdr;
u16 pbclen;
int ret;
- u32 tidval = 0, lrhlen = get_lrh_len(*hdr, datalen);
+ u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
/* Copy the header template to the request before modification */
memcpy(hdr, &req->hdr, sizeof(*hdr));
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
- u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, len);
+ u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
continue;
if (unpin)
- hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+ hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
else
atomic_dec(&node->refcount);
}
}
kfree(req->tids);
- clear_bit(SDMA_REQ_IN_USE, &req->flags);
+ clear_bit(req->info.comp_idx, req->pq->req_in_use);
}
static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
return (bool)(node->addr == addr);
}
-static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
return 0;
}
-static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
- struct mm_struct *mm)
+/*
+ * Return 1 to remove the node from the rb tree and call the remove op.
+ *
+ * Called with the rb tree lock held.
+ */
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+ struct evict_data *evict_data = evict_arg;
+
+ /* is this node still being used? */
+ if (atomic_read(&node->refcount))
+ return 0; /* keep this node */
+
+ /* this node will be evicted, add its pages to our count */
+ evict_data->cleared += node->npages;
+
+ /* have enough pages been cleared? */
+ if (evict_data->cleared >= evict_data->target)
+ *stop = true;
+
+ return 1; /* remove this node */
+}
+
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
- spin_lock(&node->pq->evict_lock);
- /*
- * We've been called by the MMU notifier but this node has been
- * scheduled for eviction. The eviction function will take care
- * of freeing this node.
- * We have to take the above lock first because we are racing
- * against the setting of the bit in the eviction function.
- */
- if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) {
- spin_unlock(&node->pq->evict_lock);
- return;
- }
+ atomic_sub(node->npages, &node->pq->n_locked);
- if (!list_empty(&node->list))
- list_del(&node->list);
- node->pq->n_locked -= node->npages;
- spin_unlock(&node->pq->evict_lock);
+ unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
- /*
- * If mm is set, we are being called by the MMU notifier and we
- * should not pass a mm_struct to unpin_vector_page(). This is to
- * prevent a deadlock when hfi1_release_user_pages() attempts to
- * take the mmap_sem, which the MMU notifier has already taken.
- */
- unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
- node->npages);
- /*
- * If called by the MMU notifier, we have to adjust the pinned
- * page count ourselves.
- */
- if (mm)
- mm->pinned_vm -= node->npages;
kfree(node);
}
-static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
struct hfi1_devdata *dd;
struct kmem_cache *txreq_cache;
struct user_sdma_request *reqs;
+ unsigned long *req_in_use;
struct iowait busy;
unsigned state;
wait_queue_head_t wait;
unsigned long unpinned;
- struct rb_root sdma_rb_root;
- u32 n_locked;
- struct list_head evict;
- spinlock_t evict_lock; /* protect evict and n_locked */
+ struct mmu_rb_handler *handler;
+ atomic_t n_locked;
+ struct mm_struct *mm;
};
struct hfi1_user_sdma_comp_q {
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
};
/*
[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv,
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
+ [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
+ [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
-static inline int qp_ok(int opcode, struct hfi1_packet *packet)
+static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp;
-
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
- goto dropit;
+ return NULL;
if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
(opcode == IB_OPCODE_CNP))
- return 1;
-dropit:
- ibp = &packet->rcd->ppd->ibport_data;
- ibp->rvp.n_pkt_drops++;
- return 0;
+ return opcode_handler_tbl[opcode];
+
+ return NULL;
}
/**
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
+ opcode_handler packet_handler;
unsigned long flags;
u32 qp_num;
int lnh;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
spin_lock_irqsave(&packet->qp->r_lock, flags);
- if (likely((qp_ok(opcode, packet))))
- opcode_handler_tbl[opcode](packet);
+ packet_handler = qp_ok(opcode, packet);
+ if (likely(packet_handler))
+ packet_handler(packet);
+ else
+ ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
/*
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags);
- if (likely((qp_ok(opcode, packet))))
- opcode_handler_tbl[opcode](packet);
+ packet_handler = qp_ok(opcode, packet);
+ if (likely(packet_handler))
+ packet_handler(packet);
+ else
+ ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
}
struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
- struct ahg_ib_header *ahdr,
+ struct hfi1_ahg_info *ahg_info,
u64 pbc)
{
int ret = 0;
- struct hfi1_pio_header *phdr = &tx->phdr;
+ struct hfi1_sdma_header *phdr = &tx->phdr;
u16 hdrbytes = tx->hdr_dwords << 2;
- if (!ahdr->ahgcount) {
+ if (!ahg_info->ahgcount) {
ret = sdma_txinit_ahg(
&tx->txreq,
- ahdr->tx_flags,
+ ahg_info->tx_flags,
hdrbytes + length,
- ahdr->ahgidx,
+ ahg_info->ahgidx,
0,
NULL,
0,
} else {
ret = sdma_txinit_ahg(
&tx->txreq,
- ahdr->tx_flags,
+ ahg_info->tx_flags,
length,
- ahdr->ahgidx,
- ahdr->ahgcount,
- ahdr->ahgdesc,
+ ahg_info->ahgidx,
+ ahg_info->ahgcount,
+ ahg_info->ahgdesc,
hdrbytes,
verbs_sdma_complete);
if (ret)
u64 pbc)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct ahg_ib_header *ahdr = priv->s_hdr;
+ struct hfi1_ahg_info *ahg_info = priv->s_ahg;
u32 hdrwords = qp->s_hdrwords;
struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
plen);
}
tx->wqe = qp->s_wqe;
- ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
+ ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ u16 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
+ (u64)dc8051_ver_min(ver);
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
rdi->dparms.props.hw_ver = dd->minrev;
rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
- rdi->dparms.props.max_mr_size = ~0ULL;
+ rdi->dparms.props.max_mr_size = U64_MAX;
+ rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
rdi->dparms.props.max_sge = hfi1_max_sges;
RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
+static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
+ size_t str_len)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ struct hfi1_ibdev *dev = dev_from_rdi(rdi);
+ u16 ver = dd_from_dev(dev)->dc8051_ver;
+
+ snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver));
+}
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
+ ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
strncpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+ /* post send table */
+ dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
+
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
- u8 sl, sc5, sc4_bit, svc_type;
- bool sc4_set = has_sc4_bit(packet);
+ u8 sl, sc5, svc_type;
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
return;
}
- sc4_bit = sc4_set << 4;
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc5 |= sc4_bit;
+ sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
} u;
} __packed;
-struct ahg_ib_header {
- struct sdma_engine *sde;
+struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
u8 ahgcount;
u8 ahgidx;
- struct hfi1_ib_header ibh;
};
-struct hfi1_pio_header {
+struct hfi1_sdma_header {
__le64 pbc;
struct hfi1_ib_header hdr;
} __packed;
* pair is made common
*/
struct hfi1_qp_priv {
- struct ahg_ib_header *s_hdr; /* next header to send */
+ struct hfi1_ahg_info *s_ahg; /* ahg info for next header */
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
u8 s_sc; /* SC[0..4] for next packet */
#include "iowait.h"
struct verbs_txreq {
- struct hfi1_pio_header phdr;
+ struct hfi1_sdma_header phdr;
struct sdma_txreq txreq;
struct rvt_qp *qp;
struct rvt_swqe *wqe;
ret = i40iw_manage_qhash(iwdev, cm_info,
I40IW_QHASH_TYPE_TCP_SYN,
I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
- kfree(child_listen_node);
- cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"freed pointer = %p\n",
child_listen_node);
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
}
spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
#define I40IW_RING_MOVE_TAIL(_ring) \
(_ring).tail = ((_ring).tail + 1) % (_ring).size
+#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \
+ (_ring).head = ((_ring).head + 1) % (_ring).size
+
#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
(_ring).tail = ((_ring).tail + (_count)) % (_ring).size
u16 txoffset, bufoffset;
buf = i40iw_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
nextseqnum = buf->seqnum + fpdu_len;
txbuf->totallen = buf->hdrlen + fpdu_len;
txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
fpdu_len -= buf->datalen;
i40iw_puda_ret_bufpool(ieq, buf);
buf = i40iw_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
} while (1);
bool time_stamp;
u8 cwnd_inc_limit;
bool drop_ooo_seg;
- bool dup_ack_thresh;
+ u8 dup_ack_thresh;
u8 ttl;
u8 src_mac_addr_idx;
bool avoid_stretch_ack;
i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
- for (i = 1; i < op_info->num_lo_sges; i++) {
- byte_off = 32 + (i - 1) * 16;
+ for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
+ byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
i40iw_set_fragment(wqe, 0, op_info->sg_list);
- for (i = 1; i < op_info->num_sges; i++) {
- byte_off = 32 + (i - 1) * 16;
+ for (i = 1, byte_off = 32; i < op_info->num_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
+ byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
i40iw_set_fragment(wqe, 0, info->sg_list);
- for (i = 1; i < info->num_sges; i++) {
- byte_off = 32 + (i - 1) * 16;
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
+ byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
* @post_cq: update cq tail
*/
static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
- struct i40iw_cq_poll_info *info,
- bool post_cq)
+ struct i40iw_cq_poll_info *info)
{
u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
u64 *cqe, *sw_wqe;
struct i40iw_ring *pring = NULL;
u32 wqe_idx, q_type, array_idx = 0;
enum i40iw_status_code ret_code = 0;
- enum i40iw_status_code ret_code2 = 0;
bool move_cq_head = true;
u8 polarity;
u8 addl_wqes = 0;
move_cq_head = false;
if (move_cq_head) {
- I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2);
-
- if (ret_code2 && !ret_code)
- ret_code = ret_code2;
+ I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
cq->polarity ^= 1;
- if (post_cq) {
- I40IW_RING_MOVE_TAIL(cq->cq_ring);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- }
+ I40IW_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
} else {
if (info->is_srq)
return ret_code;
void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
enum i40iw_completion_notify);
enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
- struct i40iw_cq_poll_info *, bool);
+ struct i40iw_cq_poll_info *);
enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
};
status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
if (status)
- return -ENOSYS;
+ return -ENOMEM;
sqdepth = sq_size << sqshift;
rqdepth = rq_size << rqshift;
iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
if (init_attr->qp_type != IB_QPT_RC) {
- err_code = -ENOSYS;
+ err_code = -EINVAL;
goto error;
}
if (iwdev->push_mode)
iwmr->ibmr.lkey = stag;
iwmr->page_cnt = 1;
iwmr->pgaddrmem[0] = addr;
+ iwmr->length = size;
status = i40iw_hwreg_mr(iwdev, iwmr, access);
if (status) {
i40iw_free_stag(iwdev, stag);
{
u64 kva = 0;
- return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
+ return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
}
/**
return sprintf(buf, "%x\n", hw_rev);
}
-/**
- * i40iw_show_fw_ver
- */
-static ssize_t i40iw_show_fw_ver(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u32 firmware_version = I40IW_FW_VERSION;
-
- return sprintf(buf, "%u.%u\n", firmware_version,
- (firmware_version & 0x000000ff));
-}
-
/**
* i40iw_show_hca
*/
}
static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
static struct device_attribute *i40iw_dev_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
}
- if (ret)
- err = -EIO;
+ if (ret) {
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
break;
case IB_WR_RDMA_WRITE:
info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
}
- if (ret)
- err = -EIO;
+ if (ret) {
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
break;
case IB_WR_RDMA_READ_WITH_INV:
inv_stag = true;
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
- if (ret)
- err = -EIO;
+ if (ret) {
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
break;
case IB_WR_LOCAL_INV:
info.op_type = I40IW_OP_TYPE_INV_STAG;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
if (ret)
- err = -EIO;
+ err = -ENOMEM;
break;
case IB_WR_REG_MR:
{
ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
if (ret)
- err = -EIO;
+ err = -ENOMEM;
break;
}
default:
struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
enum i40iw_status_code ret = 0;
unsigned long flags;
+ int err = 0;
iwqp = (struct i40iw_qp *)ibqp;
ukqp = &iwqp->sc_qp.qp_uk;
ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
if (ret) {
i40iw_pr_err(" post_recv err %d\n", ret);
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
*bad_wr = ib_wr;
goto out;
}
}
out:
spin_unlock_irqrestore(&iwqp->lock, flags);
- if (ret)
- return -ENOSYS;
- return 0;
+ return err;
}
/**
spin_lock_irqsave(&iwcq->lock, flags);
while (cqe_count < num_entries) {
- ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
+ ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
if (ret == I40IW_ERR_QUEUE_EMPTY) {
break;
} else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
"iwRdmaInv"
};
+static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str,
+ size_t str_len)
+{
+ u32 firmware_version = I40IW_FW_VERSION;
+
+ snprintf(str, str_len, "%u.%u", firmware_version,
+ (firmware_version & 0x000000ff));
+}
+
/**
* i40iw_alloc_hw_stats - Allocate a hw stats structure
* @ibdev: device pointer from stack
int port_modify_mask,
struct ib_port_modify *props)
{
- return 0;
+ return -ENOSYS;
}
/**
memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(iwibdev->ibdev.iwcm->ifname));
iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
+ iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
iwibdev->ibdev.poll_cq = i40iw_poll_cq;
iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
iwibdev->ibdev.post_send = i40iw_post_send;
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev)
- return -ENOSYS;
+ return -ENOMEM;
iwibdev = iwdev->iwibdev;
ret = ib_register_device(&iwibdev->ibdev, NULL);
kfree(iwdev->iwibdev->ibdev.iwcm);
iwdev->iwibdev->ibdev.iwcm = NULL;
ib_dealloc_device(&iwdev->iwibdev->ibdev);
- return -ENOSYS;
+ return ret;
}
if (cq->resize_buf)
return -EBUSY;
- cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return -EFAULT;
- cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mlx4_ib_dev *dev =
- container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
- (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
- (int) dev->dev->caps.fw_ver & 0xffff);
-}
-
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
+struct diag_counter {
+ const char *name;
+ u32 offset;
+};
+
+#define DIAG_COUNTER(_name, _offset) \
+ { .name = #_name, .offset = _offset }
+
+static const struct diag_counter diag_basic[] = {
+ DIAG_COUNTER(rq_num_lle, 0x00),
+ DIAG_COUNTER(sq_num_lle, 0x04),
+ DIAG_COUNTER(rq_num_lqpoe, 0x08),
+ DIAG_COUNTER(sq_num_lqpoe, 0x0C),
+ DIAG_COUNTER(rq_num_lpe, 0x18),
+ DIAG_COUNTER(sq_num_lpe, 0x1C),
+ DIAG_COUNTER(rq_num_wrfe, 0x20),
+ DIAG_COUNTER(sq_num_wrfe, 0x24),
+ DIAG_COUNTER(sq_num_mwbe, 0x2C),
+ DIAG_COUNTER(sq_num_bre, 0x34),
+ DIAG_COUNTER(sq_num_rire, 0x44),
+ DIAG_COUNTER(rq_num_rire, 0x48),
+ DIAG_COUNTER(sq_num_rae, 0x4C),
+ DIAG_COUNTER(rq_num_rae, 0x50),
+ DIAG_COUNTER(sq_num_roe, 0x54),
+ DIAG_COUNTER(sq_num_tree, 0x5C),
+ DIAG_COUNTER(sq_num_rree, 0x64),
+ DIAG_COUNTER(rq_num_rnr, 0x68),
+ DIAG_COUNTER(sq_num_rnr, 0x6C),
+ DIAG_COUNTER(rq_num_oos, 0x100),
+ DIAG_COUNTER(sq_num_oos, 0x104),
+};
+
+static const struct diag_counter diag_ext[] = {
+ DIAG_COUNTER(rq_num_dup, 0x130),
+ DIAG_COUNTER(sq_num_to, 0x134),
+};
+
+static const struct diag_counter diag_device_only[] = {
+ DIAG_COUNTER(num_cqovf, 0x1A0),
+ DIAG_COUNTER(rq_num_udsdprd, 0x118),
+};
+
+static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+
+ if (!diag[!!port_num].name)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
+ diag[!!port_num].num_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+ u32 hw_value[ARRAY_SIZE(diag_device_only) +
+ ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
+ int ret;
+ int i;
+
+ ret = mlx4_query_diag_counters(dev->dev,
+ MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
+ diag[!!port].offset, hw_value,
+ diag[!!port].num_counters, port);
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < diag[!!port].num_counters; i++)
+ stats->value[i] = hw_value[i];
+
+ return diag[!!port].num_counters;
+}
+
+static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
+ const char ***name,
+ u32 **offset,
+ u32 *num,
+ bool port)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(diag_basic);
+
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
+ num_counters += ARRAY_SIZE(diag_ext);
+
+ if (!port)
+ num_counters += ARRAY_SIZE(diag_device_only);
+
+ *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
+ if (!*name)
+ return -ENOMEM;
+
+ *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
+ if (!*offset)
+ goto err_name;
+
+ *num = num_counters;
+
+ return 0;
+
+err_name:
+ kfree(*name);
+ return -ENOMEM;
+}
+
+static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
+ const char **name,
+ u32 *offset,
+ bool port)
+{
+ int i;
+ int j;
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
+ name[i] = diag_basic[i].name;
+ offset[i] = diag_basic[i].offset;
+ }
+
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
+ for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
+ name[j] = diag_ext[i].name;
+ offset[j] = diag_ext[i].offset;
+ }
+ }
+
+ if (!port) {
+ for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
+ name[j] = diag_device_only[i].name;
+ offset[j] = diag_device_only[i].offset;
+ }
+ }
+}
+
+static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
+{
+ struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
+ int i;
+ int ret;
+ bool per_port = !!(ibdev->dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
+
+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
+ /* i == 1 means we are building port counters */
+ if (i && !per_port)
+ continue;
+
+ ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
+ &diag[i].offset,
+ &diag[i].num_counters, i);
+ if (ret)
+ goto err_alloc;
+
+ mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
+ diag[i].offset, i);
+ }
+
+ ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
+ ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
+
+ return 0;
+
+err_alloc:
+ if (i) {
+ kfree(diag[i - 1].name);
+ kfree(diag[i - 1].offset);
+ }
+
+ return ret;
+}
+
+static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
+{
+ int i;
+
+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
+ kfree(ibdev->diag_counters[i].offset);
+ kfree(ibdev->diag_counters[i].name);
+ }
+}
+
#define MLX4_IB_INVALID_MAC ((u64)-1)
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
return 0;
}
+static void get_fw_ver_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%d",
+ (int) (dev->dev->caps.fw_ver >> 32),
+ (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->dev->caps.fw_ver & 0xffff);
+}
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
+ ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
if (!mlx4_is_slave(ibdev->dev)) {
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
- if (ib_register_device(&ibdev->ib_dev, NULL))
+ if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
+ if (ib_register_device(&ibdev->ib_dev, NULL))
+ goto err_diag_counters;
+
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_diag_counters:
+ mlx4_ib_diag_cleanup(ibdev);
+
err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
u32 default_counter;
};
+#define MLX4_DIAG_COUNTERS_TYPES 2
+
+struct mlx4_ib_diag_counters {
+ const char **name;
+ u32 *offset;
+ u32 num_counters;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
+ struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
};
struct ib_event_work {
item->key = be32_to_cpu(cqe->mkey);
}
+static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx5_ib_wq *wq;
+ unsigned int cur;
+ unsigned int idx;
+ int np;
+ int i;
+
+ wq = &qp->sq;
+ cur = wq->head - wq->tail;
+ np = *npolled;
+
+ if (cur == 0)
+ return;
+
+ for (i = 0; i < cur && np < num_entries; i++) {
+ idx = wq->last_poll & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+ wq->tail++;
+ np++;
+ wc->qp = &qp->ibqp;
+ wc++;
+ wq->last_poll = wq->w_list[idx].next;
+ }
+ *npolled = np;
+}
+
+static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx5_ib_wq *wq;
+ unsigned int cur;
+ int np;
+ int i;
+
+ wq = &qp->rq;
+ cur = wq->head - wq->tail;
+ np = *npolled;
+
+ if (cur == 0)
+ return;
+
+ for (i = 0; i < cur && np < num_entries; i++) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+ wq->tail++;
+ np++;
+ wc->qp = &qp->ibqp;
+ wc++;
+ }
+ *npolled = np;
+}
+
+static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx5_ib_qp *qp;
+
+ *npolled = 0;
+ /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ sw_send_comp(qp, num_entries, wc + *npolled, npolled);
+ if (*npolled >= num_entries)
+ return;
+ }
+
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
+ if (*npolled >= num_entries)
+ return;
+ }
+}
+
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
{
struct mlx5_ib_cq *cq = to_mcq(ibcq);
struct mlx5_ib_qp *cur_qp = NULL;
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int soft_polled = 0;
int npolled;
int err = 0;
spin_lock_irqsave(&cq->lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+ goto out;
+ }
if (unlikely(!list_empty(&cq->wc_list)))
soft_polled = poll_soft_wc(cq, num_entries, wc);
if (npolled)
mlx5_cq_set_ci(&cq->mcq);
-
+out:
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
-static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
-{
- return ++index % gsi->cap.max_send_wr;
-}
-
-#define for_each_outstanding_wr(gsi, index) \
- for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
- index = next_outstanding(gsi, index))
-
/* Call with gsi->lock locked */
static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
{
struct mlx5_ib_gsi_wr *wr;
u32 index;
- for_each_outstanding_wr(gsi, index) {
- wr = &gsi->outstanding_wrs[index];
+ for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
+ index++) {
+ wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
if (!wr->completed)
break;
return -ENOMEM;
}
- gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
- gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
+ gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
+ gsi->cap.max_send_wr];
+ gsi->outstanding_pi++;
if (!wc) {
memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
#include <asm/pat.h>
#endif
#include <linux/sched.h>
+#include <linux/delay.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
+#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
+ struct mlx5_ib_query_device_resp resp = {};
+ size_t resp_len;
+ u64 max_tso;
- if (uhw->inlen || uhw->outlen)
+ resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
+ if (uhw->outlen && uhw->outlen < resp_len)
+ return -EINVAL;
+ else
+ resp.response_length = resp_len;
+
+ if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
- if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
+ if (MLX5_CAP_ETH(mdev, csum_cap))
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
+ if (max_tso) {
+ resp.tso_caps.max_tso = 1 << max_tso;
+ resp.tso_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
+ resp.response_length += sizeof(resp.tso_caps);
+ }
+ }
+ }
+
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (!mlx5_core_is_pf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+ if (uhw->outlen) {
+ err = ib_copy_to_udata(uhw, &resp, resp.response_length);
+
+ if (err)
+ return err;
+ }
+
return 0;
}
goto out_uars;
}
+ INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cqe_version);
+ if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
+ resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
+ resp.response_length += sizeof(resp.cmds_supp_uhw);
+ }
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
offsetof(struct mlx5_init_seg, internal_timer_h) %
PAGE_SIZE;
resp.response_length += sizeof(resp.hca_core_clock_offset) +
- sizeof(resp.reserved2) +
- sizeof(resp.reserved3);
+ sizeof(resp.reserved2);
}
err = ib_copy_to_udata(udata, &resp, resp.response_length);
return get_arg(offset);
}
+static void mlx5_ib_vma_open(struct vm_area_struct *area)
+{
+ /* vma_open is called when a new VMA is created on top of our VMA. This
+ * is done through either mremap flow or split_vma (usually due to
+ * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
+ * as this VMA is strongly hardware related. Therefore we set the
+ * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
+ * calling us again and trying to do incorrect actions. We assume that
+ * the original VMA size is exactly a single page, and therefore all
+ * "splitting" operation will not happen to it.
+ */
+ area->vm_ops = NULL;
+}
+
+static void mlx5_ib_vma_close(struct vm_area_struct *area)
+{
+ struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
+
+ /* It's guaranteed that all VMAs opened on a FD are closed before the
+ * file itself is closed, therefore no sync is needed with the regular
+ * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
+ * However need a sync with accessing the vma as part of
+ * mlx5_ib_disassociate_ucontext.
+ * The close operation is usually called under mm->mmap_sem except when
+ * process is exiting.
+ * The exiting case is handled explicitly as part of
+ * mlx5_ib_disassociate_ucontext.
+ */
+ mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
+
+ /* setting the vma context pointer to null in the mlx5_ib driver's
+ * private data, to protect a race condition in
+ * mlx5_ib_disassociate_ucontext().
+ */
+ mlx5_ib_vma_priv_data->vma = NULL;
+ list_del(&mlx5_ib_vma_priv_data->list);
+ kfree(mlx5_ib_vma_priv_data);
+}
+
+static const struct vm_operations_struct mlx5_ib_vm_ops = {
+ .open = mlx5_ib_vma_open,
+ .close = mlx5_ib_vma_close
+};
+
+static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *ctx)
+{
+ struct mlx5_ib_vma_private_data *vma_prv;
+ struct list_head *vma_head = &ctx->vma_private_list;
+
+ vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
+ if (!vma_prv)
+ return -ENOMEM;
+
+ vma_prv->vma = vma;
+ vma->vm_private_data = vma_prv;
+ vma->vm_ops = &mlx5_ib_vm_ops;
+
+ list_add(&vma_prv->list, vma_head);
+
+ return 0;
+}
+
+static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ int ret;
+ struct vm_area_struct *vma;
+ struct mlx5_ib_vma_private_data *vma_private, *n;
+ struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ put_task_struct(owning_process);
+ usleep_range(1000, 2000);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the
+ * task struct.
+ */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
+
+ /* need to protect from a race on closing the vma as part of
+ * mlx5_ib_vma_close.
+ */
+ down_read(&owning_mm->mmap_sem);
+ list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
+ list) {
+ vma = vma_private->vma;
+ ret = zap_vma_ptes(vma, vma->vm_start,
+ PAGE_SIZE);
+ WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
+ /* context going to be destroyed, should
+ * not access ops any more.
+ */
+ vma->vm_ops = NULL;
+ list_del(&vma_private->list);
+ kfree(vma_private);
+ }
+ up_read(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
+}
+
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
{
switch (cmd) {
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
- struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
+ struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *context)
{
+ struct mlx5_uuar_info *uuari = &context->uuari;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
vma->vm_start, &pa);
- return 0;
+ return mlx5_ib_set_vma_data(vma, context);
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
- struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
phys_addr_t pfn;
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
- return uar_mmap(dev, command, vma, uuari);
+ return uar_mmap(dev, command, vma, context);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
break;
+ case IB_FLOW_SPEC_IPV6:
+ if (ib_spec->size != sizeof(ib_spec->ipv6))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ETH_P_IPV6);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+ break;
case IB_FLOW_SPEC_TCP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
- return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
- fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
-}
-
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_fw_pages,
mutex_unlock(&ports->devr->mutex);
}
+static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_ib_qp *mqp;
+ struct mlx5_ib_cq *send_mcq, *recv_mcq;
+ struct mlx5_core_cq *mcq;
+ struct list_head cq_armed_list;
+ unsigned long flags_qp;
+ unsigned long flags_cq;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&cq_armed_list);
+
+ /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
+ spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
+ list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
+ spin_lock_irqsave(&mqp->sq.lock, flags_qp);
+ if (mqp->sq.tail != mqp->sq.head) {
+ send_mcq = to_mcq(mqp->ibqp.send_cq);
+ spin_lock_irqsave(&send_mcq->lock, flags_cq);
+ if (send_mcq->mcq.comp &&
+ mqp->ibqp.send_cq->comp_handler) {
+ if (!send_mcq->mcq.reset_notify_added) {
+ send_mcq->mcq.reset_notify_added = 1;
+ list_add_tail(&send_mcq->mcq.reset_notify,
+ &cq_armed_list);
+ }
+ }
+ spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
+ }
+ spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
+ spin_lock_irqsave(&mqp->rq.lock, flags_qp);
+ /* no handling is needed for SRQ */
+ if (!mqp->ibqp.srq) {
+ if (mqp->rq.tail != mqp->rq.head) {
+ recv_mcq = to_mcq(mqp->ibqp.recv_cq);
+ spin_lock_irqsave(&recv_mcq->lock, flags_cq);
+ if (recv_mcq->mcq.comp &&
+ mqp->ibqp.recv_cq->comp_handler) {
+ if (!recv_mcq->mcq.reset_notify_added) {
+ recv_mcq->mcq.reset_notify_added = 1;
+ list_add_tail(&recv_mcq->mcq.reset_notify,
+ &cq_armed_list);
+ }
+ }
+ spin_unlock_irqrestore(&recv_mcq->lock,
+ flags_cq);
+ }
+ }
+ spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
+ }
+ /*At that point all inflight post send were put to be executed as of we
+ * lock/unlock above locks Now need to arm all involved CQs.
+ */
+ list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
+ mcq->comp(mcq);
+ }
+ spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
case MLX5_DEV_EVENT_SYS_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
+ mlx5_ib_handle_internal_error(ibdev);
break;
case MLX5_DEV_EVENT_PORT_UP:
return 0;
}
+static void get_dev_fw_str(struct ib_device *ibdev, char *str,
+ size_t str_len)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(ibdev, struct mlx5_ib_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
+ fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
+}
+
static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
{
int err;
unregister_netdevice_notifier(&dev->roce.nb);
}
+static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_ports; i++)
+ mlx5_core_dealloc_q_counter(dev->mdev,
+ dev->port[i].q_cnt_id);
+}
+
+static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ ret = mlx5_core_alloc_q_counter(dev->mdev,
+ &dev->port[i].q_cnt_id);
+ if (ret) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, ret);
+ goto dealloc_counters;
+ }
+ }
+
+ return 0;
+
+dealloc_counters:
+ while (--i >= 0)
+ mlx5_core_dealloc_q_counter(dev->mdev,
+ dev->port[i].q_cnt_id);
+
+ return ret;
+}
+
+static const char * const names[] = {
+ "rx_write_requests",
+ "rx_read_requests",
+ "rx_atomic_requests",
+ "out_of_buffer",
+ "out_of_sequence",
+ "duplicate_request",
+ "rnr_nak_retry_err",
+ "packet_seq_err",
+ "implied_nak_seq_err",
+ "local_ack_timeout_err",
+};
+
+static const size_t stats_offsets[] = {
+ MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
+ MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
+ MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
+ MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
+ MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
+ MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
+ MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
+ MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
+ MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
+ MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
+};
+
+static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+
+ /* We support only per port stats */
+ if (port_num == 0)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
+ void *out;
+ __be32 val;
+ int ret;
+ int i;
+
+ if (!port || !stats)
+ return -ENOSYS;
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_core_query_q_counter(dev->mdev,
+ dev->port[port - 1].q_cnt_id, 0,
+ out, outlen);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < ARRAY_SIZE(names); i++) {
+ val = *(__be32 *)(out + stats_offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+free:
+ kvfree(out);
+ return ARRAY_SIZE(names);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
dev->mdev = mdev;
+ dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
+ GFP_KERNEL);
+ if (!dev->port)
+ goto err_dealloc;
+
rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
- goto err_dealloc;
+ goto err_free_port;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
}
+ dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
+
mlx5_ib_internal_fill_odp_caps(dev);
if (MLX5_CAP_GEN(mdev, imaicl)) {
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
+ MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
+ dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
+ }
+
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.create_wq = mlx5_ib_create_wq;
+ dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
+ dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
+ dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+ dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
}
err = init_node_data(dev);
if (err)
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ INIT_LIST_HEAD(&dev->qp_list);
+ spin_lock_init(&dev->reset_flow_resource_lock);
if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_roce(dev);
if (err)
goto err_rsrc;
- err = ib_register_device(&dev->ib_dev, NULL);
+ err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
+ err = ib_register_device(&dev->ib_dev, NULL);
+ if (err)
+ goto err_q_cnt;
+
err = create_umr_res(dev);
if (err)
goto err_dev;
err_dev:
ib_unregister_device(&dev->ib_dev);
+err_q_cnt:
+ mlx5_ib_dealloc_q_counters(dev);
+
err_odp:
mlx5_ib_odp_remove_one(dev);
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
+err_free_port:
+ kfree(dev->port);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
+ mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
+ kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}
MLX5_CQE_VERSION_V1,
};
+struct mlx5_ib_vma_private_data {
+ struct list_head list;
+ struct vm_area_struct *vma;
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
+ struct list_head vma_private_list;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
void *qend;
};
+struct mlx5_ib_rwq {
+ struct ib_wq ibwq;
+ u32 rqn;
+ u32 rq_num_pas;
+ u32 log_rq_stride;
+ u32 log_rq_size;
+ u32 rq_page_offset;
+ u32 log_page_size;
+ struct ib_umem *umem;
+ size_t buf_size;
+ unsigned int page_shift;
+ int create_type;
+ struct mlx5_db db;
+ u32 user_index;
+ u32 wqe_count;
+ u32 wqe_shift;
+ int wq_sig;
+};
+
enum {
MLX5_QP_USER,
MLX5_QP_KERNEL,
MLX5_QP_EMPTY
};
+enum {
+ MLX5_WQ_USER,
+ MLX5_WQ_KERNEL
+};
+
+struct mlx5_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_rwq_ind_tbl;
+ u32 rqtn;
+};
+
/*
* Connect-IB can trigger up to four concurrent pagefaults
* per-QP.
u8 resp_depth;
};
+struct mlx5_ib_rss_qp {
+ u32 tirn;
+};
+
struct mlx5_ib_rq {
struct mlx5_ib_qp_base base;
struct mlx5_ib_wq *rq;
union {
struct mlx5_ib_qp_trans trans_qp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ struct mlx5_ib_rss_qp rss_qp;
};
struct mlx5_buf buf;
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
+ struct list_head qps_list;
+ struct list_head cq_recv_list;
+ struct list_head cq_send_list;
};
struct mlx5_ib_cq_buf {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
u32 create_flags;
struct list_head wc_list;
enum ib_cq_notify_flags notify_flags;
struct mutex mutex;
};
+struct mlx5_ib_port {
+ u16 q_cnt_id;
+};
+
struct mlx5_roce {
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
* netdev pointer
struct srcu_struct mr_srcu;
#endif
struct mlx5_ib_flow_db flow_db;
+ /* protect resources needed as part of reset flow */
+ spinlock_t reset_flow_resource_lock;
+ struct list_head qp_list;
+ /* Array with num_ports elements */
+ struct mlx5_ib_port *port;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
return container_of(ibqp, struct mlx5_ib_qp, ibqp);
}
+static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq)
+{
+ return container_of(ibwq, struct mlx5_ib_rwq, ibwq);
+}
+
+static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl);
+}
+
static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
{
return container_of(msrq, struct mlx5_ib_srq, msrq);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
+struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_destroy_wq(struct ib_wq *wq);
+int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
extern struct workqueue_struct *mlx5_ib_page_fault_wq;
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
+ struct mlx5_core_dev *mdev = dev->mdev;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
struct ib_send_wr *bad;
int err;
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return 0;
+
mlx5_ib_init_umr_context(&umr_context);
umrwr.wr.wr_cqe = &umr_context.cqe;
u8 rsvd0[16];
};
+static void get_cqs(enum ib_qp_type qp_type,
+ struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
+ struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
+
static int is_qp0(enum ib_qp_type qp_type)
{
return qp_type == IB_QPT_SMI;
}
}
+static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq);
+static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq);
+
static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
{
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
return err;
}
+static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+{
+ struct mlx5_ib_ucontext *context;
+
+ context = to_mucontext(pd->uobject->context);
+ mlx5_ib_db_unmap_user(context, &rwq->db);
+ if (rwq->umem)
+ ib_umem_release(rwq->umem);
+}
+
+static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_rwq *rwq,
+ struct mlx5_ib_create_wq *ucmd)
+{
+ struct mlx5_ib_ucontext *context;
+ int page_shift = 0;
+ int npages;
+ u32 offset = 0;
+ int ncont = 0;
+ int err;
+
+ if (!ucmd->buf_addr)
+ return -EINVAL;
+
+ context = to_mucontext(pd->uobject->context);
+ rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
+ rwq->buf_size, 0, 0);
+ if (IS_ERR(rwq->umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ err = PTR_ERR(rwq->umem);
+ return err;
+ }
+
+ mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
+ &ncont, NULL);
+ err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
+ &rwq->rq_page_offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ rwq->rq_num_pas = ncont;
+ rwq->page_shift = page_shift;
+ rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
+
+ mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
+ (unsigned long long)ucmd->buf_addr, rwq->buf_size,
+ npages, page_shift, ncont, offset);
+
+ err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
+ if (err) {
+ mlx5_ib_dbg(dev, "map failed\n");
+ goto err_umem;
+ }
+
+ rwq->create_type = MLX5_WQ_USER;
+ return 0;
+
+err_umem:
+ ib_umem_release(rwq->umem);
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
rq->doorbell = &qp->db;
}
+static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+ mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+}
+
+static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ struct mlx5_ib_create_qp_resp resp = {};
+ int inlen;
+ int err;
+ u32 *in;
+ void *tirc;
+ void *hfso;
+ u32 selected_fields = 0;
+ size_t min_resp_len;
+ u32 tdn = mucontext->tdn;
+ struct mlx5_ib_create_qp_rss ucmd = {};
+ size_t required_cmd_sz;
+
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ return -EOPNOTSUPP;
+
+ if (init_attr->create_flags || init_attr->send_cq)
+ return -EINVAL;
+
+ min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
+ if (udata->outlen < min_resp_len)
+ return -EINVAL;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1);
+ if (udata->inlen < required_cmd_sz) {
+ mlx5_ib_dbg(dev, "invalid inlen\n");
+ return -EINVAL;
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ mlx5_ib_dbg(dev, "inlen is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EFAULT;
+ }
+
+ if (ucmd.comp_mask) {
+ mlx5_ib_dbg(dev, "invalid comp mask\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) {
+ mlx5_ib_dbg(dev, "invalid reserved\n");
+ return -EOPNOTSUPP;
+ }
+
+ err = ib_copy_to_udata(udata, &resp, min_resp_len);
+ if (err) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EINVAL;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type,
+ MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table,
+ init_attr->rwq_ind_tbl->ind_tbl_num);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ switch (ucmd.rx_hash_function) {
+ case MLX5_RX_HASH_FUNC_TOEPLITZ:
+ {
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+ size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+
+ if (len != ucmd.rx_key_len) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, ucmd.rx_hash_key, len);
+ break;
+ }
+ default:
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (!ucmd.rx_hash_fields_mask) {
+ /* special case when this TIR serves as steering entry without hashing */
+ if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
+ goto create_tir;
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
+ ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+
+ if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
+ ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
+ selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
+ selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
+
+create_tir:
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
+
+ if (err)
+ goto err;
+
+ kvfree(in);
+ /* qpn is reserved for that QP */
+ qp->trans_qp.base.mqp.qpn = 0;
+ return 0;
+
+err:
+ kvfree(in);
+ return err;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
int inlen = sizeof(*in);
int err;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
+ if (init_attr->rwq_ind_tbl) {
+ if (!udata)
+ return -ENOSYS;
+
+ err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
+ return err;
+ }
+
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
+ get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
return 0;
err_create:
if (send_cq) {
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock,
SINGLE_DEPTH_NESTING);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock,
SINGLE_DEPTH_NESTING);
}
} else {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
}
} else if (recv_cq) {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
__acquire(&send_cq->lock);
} else {
__acquire(&send_cq->lock);
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
} else {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
}
} else if (recv_cq) {
__release(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
} else {
__release(&recv_cq->lock);
__release(&send_cq->lock);
return to_mpd(qp->ibqp.pd);
}
-static void get_cqs(struct mlx5_ib_qp *qp,
+static void get_cqs(enum ib_qp_type qp_type,
+ struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
{
- switch (qp->ibqp.qp_type) {
+ switch (qp_type) {
case IB_QPT_XRC_TGT:
*send_cq = NULL;
*recv_cq = NULL;
break;
case MLX5_IB_QPT_REG_UMR:
case IB_QPT_XRC_INI:
- *send_cq = to_mcq(qp->ibqp.send_cq);
+ *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = NULL;
break;
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
- *send_cq = to_mcq(qp->ibqp.send_cq);
- *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
+ *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
case IB_QPT_MAX:
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
+ unsigned long flags;
int err;
+ if (qp->ibqp.rwq_ind_tbl) {
+ destroy_rss_raw_qp_tir(dev, qp);
+ return;
+ }
+
base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
base->mqp.qpn);
}
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
+ &send_cq, &recv_cq);
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* del from lists under both locks above to protect reset flow paths */
+ list_del(&qp->qps_list);
+ if (send_cq)
+ list_del(&qp->cq_send_list);
+
+ if (recv_cq)
+ list_del(&qp->cq_recv_list);
if (qp->create_type == MLX5_QP_KERNEL) {
- mlx5_ib_lock_cqs(send_cq, recv_cq);
__mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
__mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
NULL);
- mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
destroy_raw_packet_qp(dev, qp);
}
pd = get_pd(qp);
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
+ &send_cq, &recv_cq);
context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
else
sqd_event = 0;
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
+ qp->port) - 1;
+ struct mlx5_ib_port *mibport = &dev->port[port_num];
+
+ context->qp_counter_set_usr_page |=
+ cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+ }
+
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->sq_crq_size |= cpu_to_be16(1 << 4);
int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
+ if (ibqp->rwq_ind_tbl)
+ return -ENOSYS;
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
spin_lock_irqsave(&qp->sq.lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *scat;
struct mlx5_rwqe_sig *sig;
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
spin_lock_irqsave(&qp->rq.lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
int err = 0;
u8 raw_packet_qp_state;
+ if (ibqp->rwq_ind_tbl)
+ return -ENOSYS;
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
return 0;
}
+
+static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr)
+{
+ struct mlx5_ib_dev *dev;
+ __be64 *rq_pas0;
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ dev = to_mdev(pd->device);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, mem_rq_type,
+ MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, user_index, rwq->user_index);
+ MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
+ MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
+ MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
+ MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
+ MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
+ MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+ rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
+ err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn);
+ kvfree(in);
+ return err;
+}
+
+static int set_user_rq_size(struct mlx5_ib_dev *dev,
+ struct ib_wq_init_attr *wq_init_attr,
+ struct mlx5_ib_create_wq *ucmd,
+ struct mlx5_ib_rwq *rwq)
+{
+ /* Sanity check RQ size before proceeding */
+ if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
+ return -EINVAL;
+
+ if (!ucmd->rq_wqe_count)
+ return -EINVAL;
+
+ rwq->wqe_count = ucmd->rq_wqe_count;
+ rwq->wqe_shift = ucmd->rq_wqe_shift;
+ rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
+ rwq->log_rq_stride = rwq->wqe_shift;
+ rwq->log_rq_size = ilog2(rwq->wqe_count);
+ return 0;
+}
+
+static int prepare_user_rq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct mlx5_ib_rwq *rwq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_create_wq ucmd = {};
+ int err;
+ size_t required_cmd_sz;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz) {
+ mlx5_ib_dbg(dev, "invalid inlen\n");
+ return -EINVAL;
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ mlx5_ib_dbg(dev, "inlen is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EFAULT;
+ }
+
+ if (ucmd.comp_mask) {
+ mlx5_ib_dbg(dev, "invalid comp mask\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ucmd.reserved) {
+ mlx5_ib_dbg(dev, "invalid reserved\n");
+ return -EOPNOTSUPP;
+ }
+
+ err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ return err;
+ }
+
+ err = create_user_rq(dev, pd, rwq, &ucmd);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ if (err)
+ return err;
+ }
+
+ rwq->user_index = ucmd.user_index;
+ return 0;
+}
+
+struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_rwq *rwq;
+ struct mlx5_ib_create_wq_resp resp = {};
+ size_t min_resp_len;
+ int err;
+
+ if (!udata)
+ return ERR_PTR(-ENOSYS);
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ dev = to_mdev(pd->device);
+ switch (init_attr->wq_type) {
+ case IB_WQT_RQ:
+ rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
+ if (!rwq)
+ return ERR_PTR(-ENOMEM);
+ err = prepare_user_rq(pd, init_attr, udata, rwq);
+ if (err)
+ goto err;
+ err = create_rq(rwq, pd, init_attr);
+ if (err)
+ goto err_user_rq;
+ break;
+ default:
+ mlx5_ib_dbg(dev, "unsupported wq type %d\n",
+ init_attr->wq_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rwq->ibwq.wq_num = rwq->rqn;
+ rwq->ibwq.state = IB_WQS_RESET;
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ goto err_copy;
+ }
+
+ return &rwq->ibwq;
+
+err_copy:
+ mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
+err_user_rq:
+ destroy_user_rq(pd, rwq);
+err:
+ kfree(rwq);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_wq(struct ib_wq *wq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(wq->device);
+ struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+
+ mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
+ destroy_user_rq(wq->pd, rwq);
+ kfree(rwq);
+
+ return 0;
+}
+
+struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+ int sz = 1 << init_attr->log_ind_tbl_size;
+ struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
+ size_t min_resp_len;
+ int inlen;
+ int err;
+ int i;
+ u32 *in;
+ void *rqtc;
+
+ if (udata->inlen > 0 &&
+ !ib_is_udata_cleared(udata, 0,
+ udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
+ if (!rwq_ind_tbl)
+ return ERR_PTR(-ENOMEM);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ for (i = 0; i < sz; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
+
+ err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
+ kvfree(in);
+
+ if (err)
+ goto err;
+
+ rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ goto err_copy;
+ }
+
+ return &rwq_ind_tbl->ib_rwq_ind_tbl;
+
+err_copy:
+ mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+err:
+ kfree(rwq_ind_tbl);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
+ struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
+
+ mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+
+ kfree(rwq_ind_tbl);
+ return 0;
+}
+
+int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(wq->device);
+ struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+ struct mlx5_ib_modify_wq ucmd = {};
+ size_t required_cmd_sz;
+ int curr_wq_state;
+ int wq_state;
+ int inlen;
+ int err;
+ void *rqc;
+ void *in;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
+ return -EFAULT;
+
+ if (ucmd.comp_mask || ucmd.reserved)
+ return -EOPNOTSUPP;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
+ wq_attr->curr_wq_state : wq->state;
+ wq_state = (wq_attr_mask & IB_WQ_STATE) ?
+ wq_attr->wq_state : curr_wq_state;
+ if (curr_wq_state == IB_WQS_ERR)
+ curr_wq_state = MLX5_RQC_STATE_ERR;
+ if (wq_state == IB_WQS_ERR)
+ wq_state = MLX5_RQC_STATE_ERR;
+ MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+ MLX5_SET(rqc, rqc, state, wq_state);
+
+ err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen);
+ kvfree(in);
+ if (!err)
+ rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
+
+ return err;
+}
}
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
- struct mlx5_create_srq_mbox_in **in,
- struct ib_udata *udata, int buf_size, int *inlen,
- int is_xrc)
+ struct mlx5_srq_attr *in,
+ struct ib_udata *udata, int buf_size)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
- void *xsrqc;
int err;
int npages;
int page_shift;
udata->inlen - sizeof(ucmd)))
return -EINVAL;
- if (is_xrc) {
+ if (in->type == IB_SRQT_XRC) {
err = get_srq_user_index(to_mucontext(pd->uobject->context),
&ucmd, udata->inlen, &uidx);
if (err)
goto err_umem;
}
- *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
- *in = mlx5_vzalloc(*inlen);
- if (!(*in)) {
+ in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
+ if (!in->pas) {
err = -ENOMEM;
goto err_umem;
}
- mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
+ mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
goto err_in;
}
- (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
-
- if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
- is_xrc){
- xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
- xrc_srq_context_entry);
- MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
- }
+ in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->page_offset = offset;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
+ in->type == IB_SRQT_XRC)
+ in->user_index = uidx;
return 0;
err_in:
- kvfree(*in);
+ kvfree(in->pas);
err_umem:
ib_umem_release(srq->umem);
}
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
- struct mlx5_create_srq_mbox_in **in, int buf_size,
- int *inlen, int is_xrc)
+ struct mlx5_srq_attr *in, int buf_size)
{
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
- void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
buf_size, page_shift, srq->buf.npages, npages);
- *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
- *in = mlx5_vzalloc(*inlen);
- if (!*in) {
+ in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+ if (!in->pas) {
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&srq->buf, (*in)->pas);
+ mlx5_fill_page_array(&srq->buf, in->pas);
srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
}
srq->wq_sig = !!srq_signature;
- (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-
- if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
- is_xrc){
- xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
- xrc_srq_context_entry);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
- }
+ in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
+ in->type == IB_SRQT_XRC)
+ in->user_index = MLX5_IB_DEFAULT_UIDX;
return 0;
err_in:
- kvfree(*in);
+ kvfree(in->pas);
err_buf:
mlx5_buf_free(dev->mdev, &srq->buf);
int desc_size;
int buf_size;
int err;
- struct mlx5_create_srq_mbox_in *uninitialized_var(in);
- int uninitialized_var(inlen);
- int is_xrc;
- u32 flgs, xrcdn;
+ struct mlx5_srq_attr in = {0};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
/* Sanity check SRQ size before proceeding */
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
- is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
-
if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
- is_xrc);
+ err = create_srq_user(pd, srq, &in, udata, buf_size);
else
- err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
- is_xrc);
+ err = create_srq_kernel(dev, srq, &in, buf_size);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
goto err_srq;
}
- in->ctx.state_log_sz = ilog2(srq->msrq.max);
- flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
- xrcdn = 0;
- if (is_xrc) {
- xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
- in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
+ in.type = init_attr->srq_type;
+ in.log_size = ilog2(srq->msrq.max);
+ in.wqe_shift = srq->msrq.wqe_shift - 4;
+ if (srq->wq_sig)
+ in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
+ if (init_attr->srq_type == IB_SRQT_XRC) {
+ in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
+ in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
} else if (init_attr->srq_type == IB_SRQT_BASIC) {
- xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
- in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
+ in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+ in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
}
- in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
-
- in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
- in->ctx.db_record = cpu_to_be64(srq->db.dma);
- err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc);
- kvfree(in);
+ in.pd = to_mpd(pd)->pdn;
+ in.db_record = srq->db.dma;
+ err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+ kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
goto err_usr_kern_srq;
struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
int ret;
- struct mlx5_query_srq_mbox_out *out;
+ struct mlx5_srq_attr *out;
out = kzalloc(sizeof(*out), GFP_KERNEL);
if (!out)
if (ret)
goto out_box;
- srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
+ srq_attr->srq_limit = out->lwm;
srq_attr->max_wr = srq->msrq.max - 1;
srq_attr->max_sge = srq->msrq.max_gs;
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
struct mlx5_wqe_srq_next_seg *next;
struct mlx5_wqe_data_seg *scat;
+ struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
spin_lock_irqsave(&srq->lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ goto out;
+ }
+
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
err = -EINVAL;
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
-
+out:
spin_unlock_irqrestore(&srq->lock, flags);
return err;
MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
};
+enum {
+ MLX5_WQ_FLAG_SIGNATURE = 1 << 0,
+};
+
/* Increment this value if any changes that break userspace ABI
* compatibility are made.
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
+enum mlx5_user_cmds_supp_uhw {
+ MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+};
+
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
__u32 comp_mask;
__u32 response_length;
__u8 cqe_version;
- __u8 reserved2;
- __u16 reserved3;
+ __u8 cmds_supp_uhw;
+ __u16 reserved2;
__u64 hca_core_clock_offset;
};
__u32 pdn;
};
+struct mlx5_ib_tso_caps {
+ __u32 max_tso; /* Maximum tso payload size in bytes */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_UD
+ */
+ __u32 supported_qpts;
+};
+
+struct mlx5_ib_query_device_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ struct mlx5_ib_tso_caps tso_caps;
+};
+
struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
__u64 sq_buf_addr;
};
+/* RX Hash function flags */
+enum mlx5_rx_hash_function_flags {
+ MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+/*
+ * RX Hash flags, these flags allows to set which incoming packet's field should
+ * participates in RX Hash. Each flag represent certain packet's field,
+ * when the flag is set the field that is represented by the flag will
+ * participate in RX Hash calculation.
+ * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
+ * and *TCP and *UDP flags can't be enabled together on the same QP.
+*/
+enum mlx5_rx_hash_fields {
+ MLX5_RX_HASH_SRC_IPV4 = 1 << 0,
+ MLX5_RX_HASH_DST_IPV4 = 1 << 1,
+ MLX5_RX_HASH_SRC_IPV6 = 1 << 2,
+ MLX5_RX_HASH_DST_IPV6 = 1 << 3,
+ MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4,
+ MLX5_RX_HASH_DST_PORT_TCP = 1 << 5,
+ MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
+ MLX5_RX_HASH_DST_PORT_UDP = 1 << 7
+};
+
+struct mlx5_ib_create_qp_rss {
+ __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
+ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
+ __u8 rx_key_len; /* valid only for Toeplitz */
+ __u8 reserved[6];
+ __u8 rx_hash_key[128]; /* valid only for Toeplitz */
+ __u32 comp_mask;
+ __u32 reserved1;
+};
+
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
__u16 reserved2;
};
+struct mlx5_ib_create_wq {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u32 rq_wqe_count;
+ __u32 rq_wqe_shift;
+ __u32 user_index;
+ __u32 flags;
+ __u32 comp_mask;
+ __u32 reserved;
+};
+
+struct mlx5_ib_create_wq_resp {
+ __u32 response_length;
+ __u32 reserved;
+};
+
+struct mlx5_ib_create_rwq_ind_tbl_resp {
+ __u32 response_length;
+ __u32 reserved;
+};
+
+struct mlx5_ib_modify_wq {
+ __u32 comp_mask;
+ __u32 reserved;
+};
+
static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
struct mlx5_ib_create_qp *ucmd,
int inlen,
return sprintf(buf, "%x\n", dev->rev_id);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mthca_dev *dev =
- container_of(device, struct mthca_dev, ib_dev.dev);
- return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
- (int) (dev->fw_ver >> 16) & 0xffff,
- (int) dev->fw_ver & 0xffff);
-}
-
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mthca_dev_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
return 0;
}
+static void get_dev_fw_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%d",
+ (int) (dev->fw_ver >> 32),
+ (int) (dev->fw_ver >> 16) & 0xffff,
+ (int) dev->fw_ver & 0xffff);
+}
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
+ dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"PCI header, aborting.\n");
- goto out;
+ goto put_dev;
}
for (i = 0; i < 64; ++i) {
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA "
"PCI header, aborting.\n");
- goto out;
+ goto free_hca;
}
}
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"bridge PCI header, aborting.\n");
- goto out;
+ goto free_hca;
}
for (i = 0; i < 64; ++i) {
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA bridge "
"PCI header, aborting.\n");
- goto out;
+ goto free_bh;
}
}
bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
err = -ENODEV;
mthca_err(mdev, "Couldn't locate HCA bridge "
"PCI-X capability, aborting.\n");
- goto out;
+ goto free_bh;
}
}
err = -ENOMEM;
mthca_err(mdev, "Couldn't map HCA reset register, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
writel(MTHCA_RESET_VALUE, reset);
err = -ENODEV;
mthca_err(mdev, "Couldn't access HCA after reset, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
if (v != 0xffffffff)
err = -ENODEV;
mthca_err(mdev, "PCI device did not come back after reset, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
"split transaction control, aborting.\n");
- goto out;
+ goto free_bh;
}
if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
"split transaction control, aborting.\n");
- goto out;
+ goto free_bh;
}
/*
* Bridge control register is at 0x3e, so we'll
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
"aborting.\n", i);
- goto out;
+ goto free_bh;
}
}
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
}
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI-X "
"command register, aborting.\n");
- goto out;
+ goto free_bh;
}
}
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
- goto out;
+ goto free_bh;
}
linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL,
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
- goto out;
+ goto free_bh;
}
}
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA reg %x, "
"aborting.\n", i);
- goto out;
+ goto free_bh;
}
}
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA COMMAND, "
"aborting.\n");
- goto out;
}
-
-out:
- if (bridge)
- pci_dev_put(bridge);
+free_bh:
kfree(bridge_header);
+free_hca:
kfree(hca_header);
-
+put_dev:
+ pci_dev_put(bridge);
return err;
}
}
-/**
- * show_fw_ver
- */
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct nes_ib_device *nesibdev =
- container_of(dev, struct nes_ib_device, ibdev.dev);
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%u.%u\n",
- (nesvnic->nesdev->nesadapter->firmware_version >> 16),
- (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
-}
-
-
/**
* show_hca
*/
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *nes_dev_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
return 0;
}
+static void get_dev_fw_str(struct ib_device *dev, char *str,
+ size_t str_len)
+{
+ struct nes_ib_device *nesibdev =
+ container_of(dev, struct nes_ib_device, ibdev);
+ struct nes_vnic *nesvnic = nesibdev->nesvnic;
+
+ nes_debug(NES_DBG_INIT, "\n");
+ snprintf(str, str_len, "%u.%u",
+ (nesvnic->nesdev->nesadapter->firmware_version >> 16),
+ (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
+}
+
/**
* nes_init_ofa_device
*/
nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
nesibdev->ibdev.get_port_immutable = nes_port_immutable;
+ nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(nesibdev->ibdev.iwcm->ifname));
return 0;
}
+static void get_dev_fw_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct ocrdma_dev *dev = get_ocrdma_dev(device);
+
+ snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]);
+}
+
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
dev->ibdev.process_mad = ocrdma_process_mad;
dev->ibdev.get_port_immutable = ocrdma_port_immutable;
+ dev->ibdev.get_dev_fw_str = get_dev_fw_str;
if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
-}
-
static ssize_t show_hca_type(struct device *device,
struct device_attribute *attr, char *buf)
{
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
static struct device_attribute *ocrdma_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type
};
32768 /* 1E */
};
+const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = {
+[IB_WR_RDMA_WRITE] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_RDMA_READ] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC,
+},
+
+[IB_WR_ATOMIC_CMP_AND_SWP] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_ATOMIC_FETCH_AND_ADD] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_RDMA_WRITE_WITH_IMM] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND_WITH_IMM] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+};
+
static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
gfp_t gfp)
{
}
if (ah_attr->ah_flags & IB_AH_GRH) {
- qib_copy_sge(&qp->r_sge, &ah_attr->grh,
- sizeof(struct ib_grh), 1);
+ struct ib_grh grh;
+ struct ib_global_route grd = ah_attr->grh;
+
+ qib_make_grh(ibp, &grh, &grd, 0, 0);
+ qib_copy_sge(&qp->r_sge, &grh,
+ sizeof(grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
rdi->dparms.props.max_total_mcast_qp_attach =
rdi->dparms.props.max_mcast_qp_attach *
rdi->dparms.props.max_mcast_grp;
+ /* post send table */
+ dd->verbs_dev.rdi.post_parms = qib_post_parms;
}
/**
extern const u32 ib_qib_rnr_table[];
+extern const struct rvt_operation_params qib_post_parms[];
+
#endif /* QIB_VERBS_H */
return 0;
}
+static void usnic_get_dev_fw_str(struct ib_device *device,
+ char *str,
+ size_t str_len)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev);
+ struct ethtool_drvinfo info;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ snprintf(str, str_len, "%s", info.fw_version);
+}
+
/* Start of PF discovery section */
static void *usnic_ib_device_add(struct pci_dev *dev)
{
us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
+ us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
if (ib_register_device(&us_ibdev->ib_dev, NULL))
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
-static ssize_t usnic_ib_show_fw_ver(struct device *device,
- struct device_attribute *attr,
- char *buf)
-{
- struct usnic_ib_dev *us_ibdev =
- container_of(device, struct usnic_ib_dev, ib_dev.dev);
- struct ethtool_drvinfo info;
-
- mutex_lock(&us_ibdev->usdev_lock);
- us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
- mutex_unlock(&us_ibdev->usdev_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
-}
-
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
char *buf)
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
-static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
static struct device_attribute *usnic_class_attributes[] = {
- &dev_attr_fw_ver,
&dev_attr_board_id,
&dev_attr_config,
&dev_attr_iface,
obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/
+obj-$(CONFIG_RDMA_RXE) += rxe/
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
depends on 64BIT
- default m
---help---
This is a common software verbs provider for RDMA networks.
if (rdi->worker)
return 0;
+ spin_lock_init(&rdi->n_cqs_lock);
rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
if (!rdi->worker)
return -ENOMEM;
init_completion(&mr->comp);
/* count returning the ptr to user */
atomic_set(&mr->refcount, 1);
+ atomic_set(&mr->lkey_invalid, 0);
mr->pd = pd;
mr->max_segs = count;
return 0;
return &mr->ibmr;
}
+/**
+ * rvt_set_page - page assignment function called by ib_sg_to_pages
+ * @ibmr: memory region
+ * @addr: dma address of mapped page
+ *
+ * Return: 0 on success
+ */
+static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct rvt_mr *mr = to_imr(ibmr);
+ u32 ps = 1 << mr->mr.page_shift;
+ u32 mapped_segs = mr->mr.length >> mr->mr.page_shift;
+ int m, n;
+
+ if (unlikely(mapped_segs == mr->mr.max_segs))
+ return -ENOMEM;
+
+ if (mr->mr.length == 0) {
+ mr->mr.user_base = addr;
+ mr->mr.iova = addr;
+ }
+
+ m = mapped_segs / RVT_SEGSZ;
+ n = mapped_segs % RVT_SEGSZ;
+ mr->mr.map[m]->segs[n].vaddr = (void *)addr;
+ mr->mr.map[m]->segs[n].length = ps;
+ mr->mr.length += ps;
+
+ return 0;
+}
+
+/**
+ * rvt_map_mr_sg - map sg list and set it the memory region
+ * @ibmr: memory region
+ * @sg: dma mapped scatterlist
+ * @sg_nents: number of entries in sg
+ * @sg_offset: offset in bytes into sg
+ *
+ * Return: number of sg elements mapped to the memory region
+ */
+int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
+{
+ struct rvt_mr *mr = to_imr(ibmr);
+
+ mr->mr.length = 0;
+ mr->mr.page_shift = PAGE_SHIFT;
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
+ rvt_set_page);
+}
+
+/**
+ * rvt_fast_reg_mr - fast register physical MR
+ * @qp: the queue pair where the work request comes from
+ * @ibmr: the memory region to be registered
+ * @key: updated key for this memory region
+ * @access: access flags for this memory region
+ *
+ * Returns 0 on success.
+ */
+int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
+ int access)
+{
+ struct rvt_mr *mr = to_imr(ibmr);
+
+ if (qp->ibqp.pd != mr->mr.pd)
+ return -EACCES;
+
+ /* not applicable to dma MR or user MR */
+ if (!mr->mr.lkey || mr->umem)
+ return -EINVAL;
+
+ if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00))
+ return -EINVAL;
+
+ ibmr->lkey = key;
+ ibmr->rkey = key;
+ mr->mr.lkey = key;
+ mr->mr.access_flags = access;
+ atomic_set(&mr->mr.lkey_invalid, 0);
+
+ return 0;
+}
+EXPORT_SYMBOL(rvt_fast_reg_mr);
+
+/**
+ * rvt_invalidate_rkey - invalidate an MR rkey
+ * @qp: queue pair associated with the invalidate op
+ * @rkey: rkey to invalidate
+ *
+ * Returns 0 on success.
+ */
+int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey)
+{
+ struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
+ struct rvt_lkey_table *rkt = &dev->lkey_table;
+ struct rvt_mregion *mr;
+
+ if (rkey == 0)
+ return -EINVAL;
+
+ rcu_read_lock();
+ mr = rcu_dereference(
+ rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ goto bail;
+
+ atomic_set(&mr->lkey_invalid, 1);
+ rcu_read_unlock();
+ return 0;
+
+bail:
+ rcu_read_unlock();
+ return -EINVAL;
+}
+EXPORT_SYMBOL(rvt_invalidate_rkey);
+
/**
* rvt_alloc_fmr - allocate a fast memory region
* @pd: the protection domain for this memory region
}
mr = rcu_dereference(
rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
- if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
off = sge->addr - mr->user_base;
mr = rcu_dereference(
rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
- if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
off = vaddr - mr->iova;
struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
+int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset);
struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
for (n = 0; n < rvt_max_atomic(rdi); n++) {
struct rvt_ack_entry *e = &qp->s_ack_queue[n];
- if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
- e->rdma_sge.mr) {
+ if (e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
qp->r_rq.wq->tail = 0;
}
qp->r_sge.num_sge = 0;
+ atomic_set(&qp->s_reserved_used, 0);
}
/**
struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
void *priv = NULL;
gfp_t gfp;
+ size_t sqsize;
if (!rdi)
return ERR_PTR(-EINVAL);
init_attr->cap.max_recv_wr == 0)
return ERR_PTR(-EINVAL);
}
-
+ sqsize =
+ init_attr->cap.max_send_wr + 1 +
+ rdi->dparms.reserved_operations;
switch (init_attr->qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
sizeof(struct rvt_swqe);
if (gfp == GFP_NOIO)
swq = __vmalloc(
- (init_attr->cap.max_send_wr + 1) * sz,
+ sqsize * sz,
gfp | __GFP_ZERO, PAGE_KERNEL);
else
swq = vzalloc_node(
- (init_attr->cap.max_send_wr + 1) * sz,
+ sqsize * sz,
rdi->dparms.node);
if (!swq)
return ERR_PTR(-ENOMEM);
spin_lock_init(&qp->s_lock);
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
+ atomic_set(&qp->local_ops_pending, 0);
init_waitqueue_head(&qp->wait);
init_timer(&qp->s_timer);
qp->s_timer.data = (unsigned long)qp;
INIT_LIST_HEAD(&qp->rspwait);
qp->state = IB_QPS_RESET;
qp->s_wq = swq;
- qp->s_size = init_attr->cap.max_send_wr + 1;
+ qp->s_size = sqsize;
qp->s_avail = init_attr->cap.max_send_wr;
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
attr->dest_qp_num = qp->remote_qpn;
attr->qp_access_flags = qp->qp_access_flags;
- attr->cap.max_send_wr = qp->s_size - 1;
+ attr->cap.max_send_wr = qp->s_size - 1 -
+ rdi->dparms.reserved_operations;
attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
attr->cap.max_send_sge = qp->s_max_sge;
attr->cap.max_recv_sge = qp->r_rq.max_sge;
}
/**
- * qp_get_savail - return number of avail send entries
+ * rvt_qp_valid_operation - validate post send wr request
+ * @qp - the qp
+ * @post-parms - the post send table for the driver
+ * @wr - the work request
+ *
+ * The routine validates the operation based on the
+ * validation table an returns the length of the operation
+ * which can extend beyond the ib_send_bw. Operation
+ * dependent flags key atomic operation validation.
*
+ * There is an exception for UD qps that validates the pd and
+ * overrides the length to include the additional UD specific
+ * length.
+ *
+ * Returns a negative error or the length of the work request
+ * for building the swqe.
+ */
+static inline int rvt_qp_valid_operation(
+ struct rvt_qp *qp,
+ const struct rvt_operation_params *post_parms,
+ struct ib_send_wr *wr)
+{
+ int len;
+
+ if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
+ return -EINVAL;
+ if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
+ return -EINVAL;
+ if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
+ ibpd_to_rvtpd(qp->ibqp.pd)->user)
+ return -EINVAL;
+ if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1)))
+ return -EINVAL;
+ if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
+ !qp->s_max_rd_atomic)
+ return -EINVAL;
+ len = post_parms[wr->opcode].length;
+ /* UD specific */
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC) {
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
+ return -EINVAL;
+ len = sizeof(struct ib_ud_wr);
+ }
+ return len;
+}
+
+/**
+ * rvt_qp_is_avail - determine queue capacity
* @qp - the qp
+ * @rdi - the rdmavt device
+ * @reserved_op - is reserved operation
*
* This assumes the s_hlock is held but the s_last
* qp variable is uncontrolled.
+ *
+ * For non reserved operations, the qp->s_avail
+ * may be changed.
+ *
+ * The return value is zero or a -ENOMEM.
*/
-static inline u32 qp_get_savail(struct rvt_qp *qp)
+static inline int rvt_qp_is_avail(
+ struct rvt_qp *qp,
+ struct rvt_dev_info *rdi,
+ bool reserved_op)
{
u32 slast;
- u32 ret;
-
+ u32 avail;
+ u32 reserved_used;
+
+ /* see rvt_qp_wqe_unreserve() */
+ smp_mb__before_atomic();
+ reserved_used = atomic_read(&qp->s_reserved_used);
+ if (unlikely(reserved_op)) {
+ /* see rvt_qp_wqe_unreserve() */
+ smp_mb__before_atomic();
+ if (reserved_used >= rdi->dparms.reserved_operations)
+ return -ENOMEM;
+ return 0;
+ }
+ /* non-reserved operations */
+ if (likely(qp->s_avail))
+ return 0;
smp_read_barrier_depends(); /* see rc.c */
slast = ACCESS_ONCE(qp->s_last);
if (qp->s_head >= slast)
- ret = qp->s_size - (qp->s_head - slast);
+ avail = qp->s_size - (qp->s_head - slast);
else
- ret = slast - qp->s_head;
- return ret - 1;
+ avail = slast - qp->s_head;
+
+ /* see rvt_qp_wqe_unreserve() */
+ smp_mb__before_atomic();
+ reserved_used = atomic_read(&qp->s_reserved_used);
+ avail = avail - 1 -
+ (rdi->dparms.reserved_operations - reserved_used);
+ /* insure we don't assign a negative s_avail */
+ if ((s32)avail <= 0)
+ return -ENOMEM;
+ qp->s_avail = avail;
+ if (WARN_ON(qp->s_avail >
+ (qp->s_size - 1 - rdi->dparms.reserved_operations)))
+ rvt_pr_err(rdi,
+ "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
+ qp->ibqp.qp_num, qp->s_size, qp->s_avail,
+ qp->s_head, qp->s_tail, qp->s_cur,
+ qp->s_acked, qp->s_last);
+ return 0;
}
/**
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
u8 log_pmtu;
int ret;
+ size_t cplen;
+ bool reserved_op;
+ int local_ops_delayed = 0;
+
+ BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
/* IB spec says that num_sge == 0 is OK. */
if (unlikely(wr->num_sge > qp->s_max_sge))
return -EINVAL;
+ ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
+ if (ret < 0)
+ return ret;
+ cplen = ret;
+
/*
- * Don't allow RDMA reads or atomic operations on UC or
- * undefined operations.
- * Make sure buffer is large enough to hold the result for atomics.
+ * Local operations include fast register and local invalidate.
+ * Fast register needs to be processed immediately because the
+ * registered lkey may be used by following work requests and the
+ * lkey needs to be valid at the time those requests are posted.
+ * Local invalidate can be processed immediately if fencing is
+ * not required and no previous local invalidate ops are pending.
+ * Signaled local operations that have been processed immediately
+ * need to have requests with "completion only" flags set posted
+ * to the send queue in order to generate completions.
*/
- if (qp->ibqp.qp_type == IB_QPT_UC) {
- if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
- return -EINVAL;
- } else if (qp->ibqp.qp_type != IB_QPT_RC) {
- /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
- if (wr->opcode != IB_WR_SEND &&
- wr->opcode != IB_WR_SEND_WITH_IMM)
- return -EINVAL;
- /* Check UD destination address PD */
- if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
+ if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
+ switch (wr->opcode) {
+ case IB_WR_REG_MR:
+ ret = rvt_fast_reg_mr(qp,
+ reg_wr(wr)->mr,
+ reg_wr(wr)->key,
+ reg_wr(wr)->access);
+ if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
+ return ret;
+ break;
+ case IB_WR_LOCAL_INV:
+ if ((wr->send_flags & IB_SEND_FENCE) ||
+ atomic_read(&qp->local_ops_pending)) {
+ local_ops_delayed = 1;
+ } else {
+ ret = rvt_invalidate_rkey(
+ qp, wr->ex.invalidate_rkey);
+ if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
+ return ret;
+ }
+ break;
+ default:
return -EINVAL;
- } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
- return -EINVAL;
- } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
- (wr->num_sge == 0 ||
- wr->sg_list[0].length < sizeof(u64) ||
- wr->sg_list[0].addr & (sizeof(u64) - 1))) {
- return -EINVAL;
- } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
- return -EINVAL;
+ }
}
+
+ reserved_op = rdi->post_parms[wr->opcode].flags &
+ RVT_OPERATION_USE_RESERVE;
/* check for avail */
- if (unlikely(!qp->s_avail)) {
- qp->s_avail = qp_get_savail(qp);
- if (WARN_ON(qp->s_avail > (qp->s_size - 1)))
- rvt_pr_err(rdi,
- "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
- qp->ibqp.qp_num, qp->s_size, qp->s_avail,
- qp->s_head, qp->s_tail, qp->s_cur,
- qp->s_acked, qp->s_last);
- if (!qp->s_avail)
- return -ENOMEM;
- }
+ ret = rvt_qp_is_avail(qp, rdi, reserved_op);
+ if (ret)
+ return ret;
next = qp->s_head + 1;
if (next >= qp->s_size)
next = 0;
pd = ibpd_to_rvtpd(qp->ibqp.pd);
wqe = rvt_get_swqe_ptr(qp, qp->s_head);
- if (qp->ibqp.qp_type != IB_QPT_UC &&
- qp->ibqp.qp_type != IB_QPT_RC)
- memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
- else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE ||
- wr->opcode == IB_WR_RDMA_READ)
- memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
- else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
- else
- memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+ /* cplen has length from above */
+ memcpy(&wqe->wr, wr, cplen);
wqe->length = 0;
j = 0;
atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
}
- wqe->ssn = qp->s_ssn++;
- wqe->psn = qp->s_next_psn;
- wqe->lpsn = wqe->psn +
- (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0);
- qp->s_next_psn = wqe->lpsn + 1;
+ if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
+ if (local_ops_delayed)
+ atomic_inc(&qp->local_ops_pending);
+ else
+ wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
+ wqe->ssn = 0;
+ wqe->psn = 0;
+ wqe->lpsn = 0;
+ } else {
+ wqe->ssn = qp->s_ssn++;
+ wqe->psn = qp->s_next_psn;
+ wqe->lpsn = wqe->psn +
+ (wqe->length ?
+ ((wqe->length - 1) >> log_pmtu) :
+ 0);
+ qp->s_next_psn = wqe->lpsn + 1;
+ }
trace_rvt_post_one_wr(qp, wqe);
+ if (unlikely(reserved_op))
+ rvt_qp_wqe_reserve(qp, wqe);
+ else
+ qp->s_avail--;
smp_wmb(); /* see request builders */
- qp->s_avail--;
qp->s_head = next;
return 0;
REG_USER_MR,
DEREG_MR,
ALLOC_MR,
+ MAP_MR_SG,
ALLOC_FMR,
MAP_PHYS_FMR,
UNMAP_FMR,
post_send),
rvt_post_send))
if (!rdi->driver_f.schedule_send ||
- !rdi->driver_f.do_send)
+ !rdi->driver_f.do_send ||
+ !rdi->post_parms)
return -EINVAL;
break;
rvt_alloc_mr);
break;
+ case MAP_MR_SG:
+ check_driver_override(rdi, offsetof(struct ib_device,
+ map_mr_sg),
+ rvt_map_mr_sg);
+ break;
+
case MAP_PHYS_FMR:
check_driver_override(rdi, offsetof(struct ib_device,
map_phys_fmr),
--- /dev/null
+config RDMA_RXE
+ tristate "Software RDMA over Ethernet (RoCE) driver"
+ depends on INET && PCI && INFINIBAND
+ depends on NET_UDP_TUNNEL
+ ---help---
+ This driver implements the InfiniBand RDMA transport over
+ the Linux network stack. It enables a system with a
+ standard Ethernet adapter to interoperate with a RoCE
+ adapter or with another system running the RXE driver.
+ Documentation on InfiniBand and RoCE can be downloaded at
+ www.infinibandta.org and www.openfabrics.org. (See also
+ siw which is a similar software driver for iWARP.)
+
+ The driver is split into two layers, one interfaces with the
+ Linux RDMA stack and implements a kernel or user space
+ verbs API. The user space verbs API requires a support
+ library named librxe which is loaded by the generic user
+ space verbs API, libibverbs. The other layer interfaces
+ with the Linux network stack at layer 3.
+
+ To configure and work with soft-RoCE driver please use the
+ following wiki page under "configure Soft-RoCE (RXE)" section:
+
+ https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
--- /dev/null
+obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o
+
+rdma_rxe-y := \
+ rxe.o \
+ rxe_comp.o \
+ rxe_req.o \
+ rxe_resp.o \
+ rxe_recv.o \
+ rxe_pool.o \
+ rxe_queue.o \
+ rxe_verbs.o \
+ rxe_av.o \
+ rxe_srq.o \
+ rxe_qp.o \
+ rxe_cq.o \
+ rxe_mr.o \
+ rxe_dma.o \
+ rxe_opcode.o \
+ rxe_mmap.o \
+ rxe_icrc.o \
+ rxe_mcast.o \
+ rxe_task.o \
+ rxe_net.o \
+ rxe_sysfs.o
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
+MODULE_DESCRIPTION("Soft RDMA transport");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.2");
+
+/* free resources for all ports on a device */
+static void rxe_cleanup_ports(struct rxe_dev *rxe)
+{
+ kfree(rxe->port.pkey_tbl);
+ rxe->port.pkey_tbl = NULL;
+
+}
+
+/* free resources for a rxe device all objects created for this device must
+ * have been destroyed
+ */
+static void rxe_cleanup(struct rxe_dev *rxe)
+{
+ rxe_pool_cleanup(&rxe->uc_pool);
+ rxe_pool_cleanup(&rxe->pd_pool);
+ rxe_pool_cleanup(&rxe->ah_pool);
+ rxe_pool_cleanup(&rxe->srq_pool);
+ rxe_pool_cleanup(&rxe->qp_pool);
+ rxe_pool_cleanup(&rxe->cq_pool);
+ rxe_pool_cleanup(&rxe->mr_pool);
+ rxe_pool_cleanup(&rxe->mw_pool);
+ rxe_pool_cleanup(&rxe->mc_grp_pool);
+ rxe_pool_cleanup(&rxe->mc_elem_pool);
+
+ rxe_cleanup_ports(rxe);
+}
+
+/* called when all references have been dropped */
+void rxe_release(struct kref *kref)
+{
+ struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
+
+ rxe_cleanup(rxe);
+ ib_dealloc_device(&rxe->ib_dev);
+}
+
+void rxe_dev_put(struct rxe_dev *rxe)
+{
+ kref_put(&rxe->ref_cnt, rxe_release);
+}
+EXPORT_SYMBOL_GPL(rxe_dev_put);
+
+/* initialize rxe device parameters */
+static int rxe_init_device_param(struct rxe_dev *rxe)
+{
+ rxe->max_inline_data = RXE_MAX_INLINE_DATA;
+
+ rxe->attr.fw_ver = RXE_FW_VER;
+ rxe->attr.max_mr_size = RXE_MAX_MR_SIZE;
+ rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP;
+ rxe->attr.vendor_id = RXE_VENDOR_ID;
+ rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID;
+ rxe->attr.hw_ver = RXE_HW_VER;
+ rxe->attr.max_qp = RXE_MAX_QP;
+ rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
+ rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
+ rxe->attr.max_sge = RXE_MAX_SGE;
+ rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
+ rxe->attr.max_cq = RXE_MAX_CQ;
+ rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;
+ rxe->attr.max_mr = RXE_MAX_MR;
+ rxe->attr.max_pd = RXE_MAX_PD;
+ rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM;
+ rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM;
+ rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
+ rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;
+ rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;
+ rxe->attr.atomic_cap = RXE_ATOMIC_CAP;
+ rxe->attr.max_ee = RXE_MAX_EE;
+ rxe->attr.max_rdd = RXE_MAX_RDD;
+ rxe->attr.max_mw = RXE_MAX_MW;
+ rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP;
+ rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP;
+ rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP;
+ rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH;
+ rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH;
+ rxe->attr.max_ah = RXE_MAX_AH;
+ rxe->attr.max_fmr = RXE_MAX_FMR;
+ rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR;
+ rxe->attr.max_srq = RXE_MAX_SRQ;
+ rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR;
+ rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE;
+ rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN;
+ rxe->attr.max_pkeys = RXE_MAX_PKEYS;
+ rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
+
+ rxe->max_ucontext = RXE_MAX_UCONTEXT;
+
+ return 0;
+}
+
+/* initialize port attributes */
+static int rxe_init_port_param(struct rxe_port *port)
+{
+ port->attr.state = RXE_PORT_STATE;
+ port->attr.max_mtu = RXE_PORT_MAX_MTU;
+ port->attr.active_mtu = RXE_PORT_ACTIVE_MTU;
+ port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;
+ port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;
+ port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;
+ port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR;
+ port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR;
+ port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN;
+ port->attr.lid = RXE_PORT_LID;
+ port->attr.sm_lid = RXE_PORT_SM_LID;
+ port->attr.lmc = RXE_PORT_LMC;
+ port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM;
+ port->attr.sm_sl = RXE_PORT_SM_SL;
+ port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT;
+ port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY;
+ port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;
+ port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;
+ port->attr.phys_state = RXE_PORT_PHYS_STATE;
+ port->mtu_cap =
+ ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
+ port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
+
+ return 0;
+}
+
+/* initialize port state, note IB convention that HCA ports are always
+ * numbered from 1
+ */
+static int rxe_init_ports(struct rxe_dev *rxe)
+{
+ struct rxe_port *port = &rxe->port;
+
+ rxe_init_port_param(port);
+
+ if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len)
+ return -EINVAL;
+
+ port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len,
+ sizeof(*port->pkey_tbl), GFP_KERNEL);
+
+ if (!port->pkey_tbl)
+ return -ENOMEM;
+
+ port->pkey_tbl[0] = 0xffff;
+ port->port_guid = rxe->ifc_ops->port_guid(rxe);
+
+ spin_lock_init(&port->port_lock);
+
+ return 0;
+}
+
+/* init pools of managed objects */
+static int rxe_init_pools(struct rxe_dev *rxe)
+{
+ int err;
+
+ err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC,
+ rxe->max_ucontext);
+ if (err)
+ goto err1;
+
+ err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD,
+ rxe->attr.max_pd);
+ if (err)
+ goto err2;
+
+ err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH,
+ rxe->attr.max_ah);
+ if (err)
+ goto err3;
+
+ err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ,
+ rxe->attr.max_srq);
+ if (err)
+ goto err4;
+
+ err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP,
+ rxe->attr.max_qp);
+ if (err)
+ goto err5;
+
+ err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ,
+ rxe->attr.max_cq);
+ if (err)
+ goto err6;
+
+ err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR,
+ rxe->attr.max_mr);
+ if (err)
+ goto err7;
+
+ err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW,
+ rxe->attr.max_mw);
+ if (err)
+ goto err8;
+
+ err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP,
+ rxe->attr.max_mcast_grp);
+ if (err)
+ goto err9;
+
+ err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
+ rxe->attr.max_total_mcast_qp_attach);
+ if (err)
+ goto err10;
+
+ return 0;
+
+err10:
+ rxe_pool_cleanup(&rxe->mc_grp_pool);
+err9:
+ rxe_pool_cleanup(&rxe->mw_pool);
+err8:
+ rxe_pool_cleanup(&rxe->mr_pool);
+err7:
+ rxe_pool_cleanup(&rxe->cq_pool);
+err6:
+ rxe_pool_cleanup(&rxe->qp_pool);
+err5:
+ rxe_pool_cleanup(&rxe->srq_pool);
+err4:
+ rxe_pool_cleanup(&rxe->ah_pool);
+err3:
+ rxe_pool_cleanup(&rxe->pd_pool);
+err2:
+ rxe_pool_cleanup(&rxe->uc_pool);
+err1:
+ return err;
+}
+
+/* initialize rxe device state */
+static int rxe_init(struct rxe_dev *rxe)
+{
+ int err;
+
+ /* init default device parameters */
+ rxe_init_device_param(rxe);
+
+ err = rxe_init_ports(rxe);
+ if (err)
+ goto err1;
+
+ err = rxe_init_pools(rxe);
+ if (err)
+ goto err2;
+
+ /* init pending mmap list */
+ spin_lock_init(&rxe->mmap_offset_lock);
+ spin_lock_init(&rxe->pending_lock);
+ INIT_LIST_HEAD(&rxe->pending_mmaps);
+ INIT_LIST_HEAD(&rxe->list);
+
+ mutex_init(&rxe->usdev_lock);
+
+ return 0;
+
+err2:
+ rxe_cleanup_ports(rxe);
+err1:
+ return err;
+}
+
+int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
+{
+ struct rxe_port *port = &rxe->port;
+ enum ib_mtu mtu;
+
+ mtu = eth_mtu_int_to_enum(ndev_mtu);
+
+ /* Make sure that new MTU in range */
+ mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;
+
+ port->attr.active_mtu = mtu;
+ port->mtu_cap = ib_mtu_enum_to_int(mtu);
+
+ return 0;
+}
+EXPORT_SYMBOL(rxe_set_mtu);
+
+/* called by ifc layer to create new rxe device.
+ * The caller should allocate memory for rxe by calling ib_alloc_device.
+ */
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
+{
+ int err;
+
+ kref_init(&rxe->ref_cnt);
+
+ err = rxe_init(rxe);
+ if (err)
+ goto err1;
+
+ err = rxe_set_mtu(rxe, mtu);
+ if (err)
+ goto err1;
+
+ err = rxe_register_device(rxe);
+ if (err)
+ goto err1;
+
+ return 0;
+
+err1:
+ rxe_dev_put(rxe);
+ return err;
+}
+EXPORT_SYMBOL(rxe_add);
+
+/* called by the ifc layer to remove a device */
+void rxe_remove(struct rxe_dev *rxe)
+{
+ rxe_unregister_device(rxe);
+
+ rxe_dev_put(rxe);
+}
+EXPORT_SYMBOL(rxe_remove);
+
+static int __init rxe_module_init(void)
+{
+ int err;
+
+ /* initialize slab caches for managed objects */
+ err = rxe_cache_init();
+ if (err) {
+ pr_err("rxe: unable to init object pools\n");
+ return err;
+ }
+
+ err = rxe_net_init();
+ if (err) {
+ pr_err("rxe: unable to init\n");
+ rxe_cache_exit();
+ return err;
+ }
+ pr_info("rxe: loaded\n");
+
+ return 0;
+}
+
+static void __exit rxe_module_exit(void)
+{
+ rxe_remove_all();
+ rxe_net_exit();
+ rxe_cache_exit();
+
+ pr_info("rxe: unloaded\n");
+}
+
+module_init(rxe_module_init);
+module_exit(rxe_module_exit);
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_H
+#define RXE_H
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/crc32.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
+
+#include "rxe_net.h"
+#include "rxe_opcode.h"
+#include "rxe_hdr.h"
+#include "rxe_param.h"
+#include "rxe_verbs.h"
+
+#define RXE_UVERBS_ABI_VERSION (1)
+
+#define IB_PHYS_STATE_LINK_UP (5)
+#define IB_PHYS_STATE_LINK_DOWN (3)
+
+#define RXE_ROCE_V2_SPORT (0xc000)
+
+int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
+
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
+void rxe_remove(struct rxe_dev *rxe);
+void rxe_remove_all(void);
+
+int rxe_rcv(struct sk_buff *skb);
+
+void rxe_dev_put(struct rxe_dev *rxe);
+struct rxe_dev *net_to_rxe(struct net_device *ndev);
+struct rxe_dev *get_rxe_by_name(const char* name);
+
+void rxe_port_up(struct rxe_dev *rxe);
+void rxe_port_down(struct rxe_dev *rxe);
+
+#endif /* RXE_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
+{
+ struct rxe_port *port;
+
+ if (attr->port_num != 1) {
+ pr_info("rxe: invalid port_num = %d\n", attr->port_num);
+ return -EINVAL;
+ }
+
+ port = &rxe->port;
+
+ if (attr->ah_flags & IB_AH_GRH) {
+ if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
+ pr_info("rxe: invalid sgid index = %d\n",
+ attr->grh.sgid_index);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
+ struct rxe_av *av, struct ib_ah_attr *attr)
+{
+ memset(av, 0, sizeof(*av));
+ memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
+ av->port_num = port_num;
+ return 0;
+}
+
+int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
+ struct ib_ah_attr *attr)
+{
+ memcpy(&attr->grh, &av->grh, sizeof(av->grh));
+ attr->port_num = av->port_num;
+ return 0;
+}
+
+int rxe_av_fill_ip_info(struct rxe_dev *rxe,
+ struct rxe_av *av,
+ struct ib_ah_attr *attr,
+ struct ib_gid_attr *sgid_attr,
+ union ib_gid *sgid)
+{
+ rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);
+ rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);
+ av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
+
+ return 0;
+}
+
+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
+{
+ if (!pkt || !pkt->qp)
+ return NULL;
+
+ if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC)
+ return &pkt->qp->pri_av;
+
+ return (pkt->wqe) ? &pkt->wqe->av : NULL;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+#include "rxe_task.h"
+
+enum comp_state {
+ COMPST_GET_ACK,
+ COMPST_GET_WQE,
+ COMPST_COMP_WQE,
+ COMPST_COMP_ACK,
+ COMPST_CHECK_PSN,
+ COMPST_CHECK_ACK,
+ COMPST_READ,
+ COMPST_ATOMIC,
+ COMPST_WRITE_SEND,
+ COMPST_UPDATE_COMP,
+ COMPST_ERROR_RETRY,
+ COMPST_RNR_RETRY,
+ COMPST_ERROR,
+ COMPST_EXIT, /* We have an issue, and we want to rerun the completer */
+ COMPST_DONE, /* The completer finished successflly */
+};
+
+static char *comp_state_name[] = {
+ [COMPST_GET_ACK] = "GET ACK",
+ [COMPST_GET_WQE] = "GET WQE",
+ [COMPST_COMP_WQE] = "COMP WQE",
+ [COMPST_COMP_ACK] = "COMP ACK",
+ [COMPST_CHECK_PSN] = "CHECK PSN",
+ [COMPST_CHECK_ACK] = "CHECK ACK",
+ [COMPST_READ] = "READ",
+ [COMPST_ATOMIC] = "ATOMIC",
+ [COMPST_WRITE_SEND] = "WRITE/SEND",
+ [COMPST_UPDATE_COMP] = "UPDATE COMP",
+ [COMPST_ERROR_RETRY] = "ERROR RETRY",
+ [COMPST_RNR_RETRY] = "RNR RETRY",
+ [COMPST_ERROR] = "ERROR",
+ [COMPST_EXIT] = "EXIT",
+ [COMPST_DONE] = "DONE",
+};
+
+static unsigned long rnrnak_usec[32] = {
+ [IB_RNR_TIMER_655_36] = 655360,
+ [IB_RNR_TIMER_000_01] = 10,
+ [IB_RNR_TIMER_000_02] = 20,
+ [IB_RNR_TIMER_000_03] = 30,
+ [IB_RNR_TIMER_000_04] = 40,
+ [IB_RNR_TIMER_000_06] = 60,
+ [IB_RNR_TIMER_000_08] = 80,
+ [IB_RNR_TIMER_000_12] = 120,
+ [IB_RNR_TIMER_000_16] = 160,
+ [IB_RNR_TIMER_000_24] = 240,
+ [IB_RNR_TIMER_000_32] = 320,
+ [IB_RNR_TIMER_000_48] = 480,
+ [IB_RNR_TIMER_000_64] = 640,
+ [IB_RNR_TIMER_000_96] = 960,
+ [IB_RNR_TIMER_001_28] = 1280,
+ [IB_RNR_TIMER_001_92] = 1920,
+ [IB_RNR_TIMER_002_56] = 2560,
+ [IB_RNR_TIMER_003_84] = 3840,
+ [IB_RNR_TIMER_005_12] = 5120,
+ [IB_RNR_TIMER_007_68] = 7680,
+ [IB_RNR_TIMER_010_24] = 10240,
+ [IB_RNR_TIMER_015_36] = 15360,
+ [IB_RNR_TIMER_020_48] = 20480,
+ [IB_RNR_TIMER_030_72] = 30720,
+ [IB_RNR_TIMER_040_96] = 40960,
+ [IB_RNR_TIMER_061_44] = 61410,
+ [IB_RNR_TIMER_081_92] = 81920,
+ [IB_RNR_TIMER_122_88] = 122880,
+ [IB_RNR_TIMER_163_84] = 163840,
+ [IB_RNR_TIMER_245_76] = 245760,
+ [IB_RNR_TIMER_327_68] = 327680,
+ [IB_RNR_TIMER_491_52] = 491520,
+};
+
+static inline unsigned long rnrnak_jiffies(u8 timeout)
+{
+ return max_t(unsigned long,
+ usecs_to_jiffies(rnrnak_usec[timeout]), 1);
+}
+
+static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
+{
+ switch (opcode) {
+ case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE;
+ case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE;
+ case IB_WR_SEND: return IB_WC_SEND;
+ case IB_WR_SEND_WITH_IMM: return IB_WC_SEND;
+ case IB_WR_RDMA_READ: return IB_WC_RDMA_READ;
+ case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP;
+ case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD;
+ case IB_WR_LSO: return IB_WC_LSO;
+ case IB_WR_SEND_WITH_INV: return IB_WC_SEND;
+ case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ;
+ case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
+ case IB_WR_REG_MR: return IB_WC_REG_MR;
+
+ default:
+ return 0xff;
+ }
+}
+
+void retransmit_timer(unsigned long data)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)data;
+
+ if (qp->valid) {
+ qp->comp.timeout = 1;
+ rxe_run_task(&qp->comp.task, 1);
+ }
+}
+
+void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct sk_buff *skb)
+{
+ int must_sched;
+
+ skb_queue_tail(&qp->resp_pkts, skb);
+
+ must_sched = skb_queue_len(&qp->resp_pkts) > 1;
+ rxe_run_task(&qp->comp.task, must_sched);
+}
+
+static inline enum comp_state get_wqe(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe **wqe_p)
+{
+ struct rxe_send_wqe *wqe;
+
+ /* we come here whether or not we found a response packet to see if
+ * there are any posted WQEs
+ */
+ wqe = queue_head(qp->sq.queue);
+ *wqe_p = wqe;
+
+ /* no WQE or requester has not started it yet */
+ if (!wqe || wqe->state == wqe_state_posted)
+ return pkt ? COMPST_DONE : COMPST_EXIT;
+
+ /* WQE does not require an ack */
+ if (wqe->state == wqe_state_done)
+ return COMPST_COMP_WQE;
+
+ /* WQE caused an error */
+ if (wqe->state == wqe_state_error)
+ return COMPST_ERROR;
+
+ /* we have a WQE, if we also have an ack check its PSN */
+ return pkt ? COMPST_CHECK_PSN : COMPST_EXIT;
+}
+
+static inline void reset_retry_counters(struct rxe_qp *qp)
+{
+ qp->comp.retry_cnt = qp->attr.retry_cnt;
+ qp->comp.rnr_retry = qp->attr.rnr_retry;
+}
+
+static inline enum comp_state check_psn(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ s32 diff;
+
+ /* check to see if response is past the oldest WQE. if it is, complete
+ * send/write or error read/atomic
+ */
+ diff = psn_compare(pkt->psn, wqe->last_psn);
+ if (diff > 0) {
+ if (wqe->state == wqe_state_pending) {
+ if (wqe->mask & WR_ATOMIC_OR_READ_MASK)
+ return COMPST_ERROR_RETRY;
+
+ reset_retry_counters(qp);
+ return COMPST_COMP_WQE;
+ } else {
+ return COMPST_DONE;
+ }
+ }
+
+ /* compare response packet to expected response */
+ diff = psn_compare(pkt->psn, qp->comp.psn);
+ if (diff < 0) {
+ /* response is most likely a retried packet if it matches an
+ * uncompleted WQE go complete it else ignore it
+ */
+ if (pkt->psn == wqe->last_psn)
+ return COMPST_COMP_ACK;
+ else
+ return COMPST_DONE;
+ } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
+ return COMPST_ERROR_RETRY;
+ } else {
+ return COMPST_CHECK_ACK;
+ }
+}
+
+static inline enum comp_state check_ack(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ unsigned int mask = pkt->mask;
+ u8 syn;
+
+ /* Check the sequence only */
+ switch (qp->comp.opcode) {
+ case -1:
+ /* Will catch all *_ONLY cases. */
+ if (!(mask & RXE_START_MASK))
+ return COMPST_ERROR;
+
+ break;
+
+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+ if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
+ pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
+ return COMPST_ERROR;
+ }
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ /* Check operation validity. */
+ switch (pkt->opcode) {
+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST:
+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY:
+ syn = aeth_syn(pkt);
+
+ if ((syn & AETH_TYPE_MASK) != AETH_ACK)
+ return COMPST_ERROR;
+
+ /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
+ * doesn't have an AETH)
+ */
+ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+ if (wqe->wr.opcode != IB_WR_RDMA_READ &&
+ wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
+ return COMPST_ERROR;
+ }
+ reset_retry_counters(qp);
+ return COMPST_READ;
+
+ case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE:
+ syn = aeth_syn(pkt);
+
+ if ((syn & AETH_TYPE_MASK) != AETH_ACK)
+ return COMPST_ERROR;
+
+ if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP &&
+ wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD)
+ return COMPST_ERROR;
+ reset_retry_counters(qp);
+ return COMPST_ATOMIC;
+
+ case IB_OPCODE_RC_ACKNOWLEDGE:
+ syn = aeth_syn(pkt);
+ switch (syn & AETH_TYPE_MASK) {
+ case AETH_ACK:
+ reset_retry_counters(qp);
+ return COMPST_WRITE_SEND;
+
+ case AETH_RNR_NAK:
+ return COMPST_RNR_RETRY;
+
+ case AETH_NAK:
+ switch (syn) {
+ case AETH_NAK_PSN_SEQ_ERROR:
+ /* a nak implicitly acks all packets with psns
+ * before
+ */
+ if (psn_compare(pkt->psn, qp->comp.psn) > 0) {
+ qp->comp.psn = pkt->psn;
+ if (qp->req.wait_psn) {
+ qp->req.wait_psn = 0;
+ rxe_run_task(&qp->req.task, 1);
+ }
+ }
+ return COMPST_ERROR_RETRY;
+
+ case AETH_NAK_INVALID_REQ:
+ wqe->status = IB_WC_REM_INV_REQ_ERR;
+ return COMPST_ERROR;
+
+ case AETH_NAK_REM_ACC_ERR:
+ wqe->status = IB_WC_REM_ACCESS_ERR;
+ return COMPST_ERROR;
+
+ case AETH_NAK_REM_OP_ERR:
+ wqe->status = IB_WC_REM_OP_ERR;
+ return COMPST_ERROR;
+
+ default:
+ pr_warn("unexpected nak %x\n", syn);
+ wqe->status = IB_WC_REM_OP_ERR;
+ return COMPST_ERROR;
+ }
+
+ default:
+ return COMPST_ERROR;
+ }
+ break;
+
+ default:
+ pr_warn("unexpected opcode\n");
+ }
+
+ return COMPST_ERROR;
+}
+
+static inline enum comp_state do_read(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int ret;
+
+ ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
+ &wqe->dma, payload_addr(pkt),
+ payload_size(pkt), to_mem_obj, NULL);
+ if (ret)
+ return COMPST_ERROR;
+
+ if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
+ return COMPST_COMP_ACK;
+ else
+ return COMPST_UPDATE_COMP;
+}
+
+static inline enum comp_state do_atomic(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int ret;
+
+ u64 atomic_orig = atmack_orig(pkt);
+
+ ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
+ &wqe->dma, &atomic_orig,
+ sizeof(u64), to_mem_obj, NULL);
+ if (ret)
+ return COMPST_ERROR;
+ else
+ return COMPST_COMP_ACK;
+}
+
+static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_cqe *cqe)
+{
+ memset(cqe, 0, sizeof(*cqe));
+
+ if (!qp->is_user) {
+ struct ib_wc *wc = &cqe->ibwc;
+
+ wc->wr_id = wqe->wr.wr_id;
+ wc->status = wqe->status;
+ wc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->byte_len = wqe->dma.length;
+ wc->qp = &qp->ibqp;
+ } else {
+ struct ib_uverbs_wc *uwc = &cqe->uibwc;
+
+ uwc->wr_id = wqe->wr.wr_id;
+ uwc->status = wqe->status;
+ uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ uwc->wc_flags = IB_WC_WITH_IMM;
+ uwc->byte_len = wqe->dma.length;
+ uwc->qp_num = qp->ibqp.qp_num;
+ }
+}
+
+static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ struct rxe_cqe cqe;
+
+ if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ (qp->req.state == QP_STATE_ERROR)) {
+ make_send_cqe(qp, wqe, &cqe);
+ rxe_cq_post(qp->scq, &cqe, 0);
+ }
+
+ advance_consumer(qp->sq.queue);
+
+ /*
+ * we completed something so let req run again
+ * if it is trying to fence
+ */
+ if (qp->req.wait_fence) {
+ qp->req.wait_fence = 0;
+ rxe_run_task(&qp->req.task, 1);
+ }
+}
+
+static inline enum comp_state complete_ack(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ unsigned long flags;
+
+ if (wqe->has_rd_atomic) {
+ wqe->has_rd_atomic = 0;
+ atomic_inc(&qp->req.rd_atomic);
+ if (qp->req.need_rd_atomic) {
+ qp->comp.timeout_retry = 0;
+ qp->req.need_rd_atomic = 0;
+ rxe_run_task(&qp->req.task, 1);
+ }
+ }
+
+ if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
+ /* state_lock used by requester & completer */
+ spin_lock_irqsave(&qp->state_lock, flags);
+ if ((qp->req.state == QP_STATE_DRAIN) &&
+ (qp->comp.psn == qp->req.psn)) {
+ qp->req.state = QP_STATE_DRAINED;
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_SQ_DRAINED;
+ qp->ibqp.event_handler(&ev,
+ qp->ibqp.qp_context);
+ }
+ } else {
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+ }
+ }
+
+ do_complete(qp, wqe);
+
+ if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
+ return COMPST_UPDATE_COMP;
+ else
+ return COMPST_DONE;
+}
+
+static inline enum comp_state complete_wqe(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ qp->comp.opcode = -1;
+
+ if (pkt) {
+ if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
+ qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+
+ if (qp->req.wait_psn) {
+ qp->req.wait_psn = 0;
+ rxe_run_task(&qp->req.task, 1);
+ }
+ }
+
+ do_complete(qp, wqe);
+
+ return COMPST_GET_WQE;
+}
+
+int rxe_completer(void *arg)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_send_wqe *wqe = wqe;
+ struct sk_buff *skb = NULL;
+ struct rxe_pkt_info *pkt = NULL;
+ enum comp_state state;
+
+ if (!qp->valid) {
+ while ((skb = skb_dequeue(&qp->resp_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+ skb = NULL;
+ pkt = NULL;
+
+ while (queue_head(qp->sq.queue))
+ advance_consumer(qp->sq.queue);
+
+ goto exit;
+ }
+
+ if (qp->req.state == QP_STATE_ERROR) {
+ while ((skb = skb_dequeue(&qp->resp_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+ skb = NULL;
+ pkt = NULL;
+
+ while ((wqe = queue_head(qp->sq.queue))) {
+ wqe->status = IB_WC_WR_FLUSH_ERR;
+ do_complete(qp, wqe);
+ }
+
+ goto exit;
+ }
+
+ if (qp->req.state == QP_STATE_RESET) {
+ while ((skb = skb_dequeue(&qp->resp_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+ skb = NULL;
+ pkt = NULL;
+
+ while (queue_head(qp->sq.queue))
+ advance_consumer(qp->sq.queue);
+
+ goto exit;
+ }
+
+ if (qp->comp.timeout) {
+ qp->comp.timeout_retry = 1;
+ qp->comp.timeout = 0;
+ } else {
+ qp->comp.timeout_retry = 0;
+ }
+
+ if (qp->req.need_retry)
+ goto exit;
+
+ state = COMPST_GET_ACK;
+
+ while (1) {
+ pr_debug("state = %s\n", comp_state_name[state]);
+ switch (state) {
+ case COMPST_GET_ACK:
+ skb = skb_dequeue(&qp->resp_pkts);
+ if (skb) {
+ pkt = SKB_TO_PKT(skb);
+ qp->comp.timeout_retry = 0;
+ }
+ state = COMPST_GET_WQE;
+ break;
+
+ case COMPST_GET_WQE:
+ state = get_wqe(qp, pkt, &wqe);
+ break;
+
+ case COMPST_CHECK_PSN:
+ state = check_psn(qp, pkt, wqe);
+ break;
+
+ case COMPST_CHECK_ACK:
+ state = check_ack(qp, pkt, wqe);
+ break;
+
+ case COMPST_READ:
+ state = do_read(qp, pkt, wqe);
+ break;
+
+ case COMPST_ATOMIC:
+ state = do_atomic(qp, pkt, wqe);
+ break;
+
+ case COMPST_WRITE_SEND:
+ if (wqe->state == wqe_state_pending &&
+ wqe->last_psn == pkt->psn)
+ state = COMPST_COMP_ACK;
+ else
+ state = COMPST_UPDATE_COMP;
+ break;
+
+ case COMPST_COMP_ACK:
+ state = complete_ack(qp, pkt, wqe);
+ break;
+
+ case COMPST_COMP_WQE:
+ state = complete_wqe(qp, pkt, wqe);
+ break;
+
+ case COMPST_UPDATE_COMP:
+ if (pkt->mask & RXE_END_MASK)
+ qp->comp.opcode = -1;
+ else
+ qp->comp.opcode = pkt->opcode;
+
+ if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
+ qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+
+ if (qp->req.wait_psn) {
+ qp->req.wait_psn = 0;
+ rxe_run_task(&qp->req.task, 1);
+ }
+
+ state = COMPST_DONE;
+ break;
+
+ case COMPST_DONE:
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ }
+ goto done;
+
+ case COMPST_EXIT:
+ if (qp->comp.timeout_retry && wqe) {
+ state = COMPST_ERROR_RETRY;
+ break;
+ }
+
+ /* re reset the timeout counter if
+ * (1) QP is type RC
+ * (2) the QP is alive
+ * (3) there is a packet sent by the requester that
+ * might be acked (we still might get spurious
+ * timeouts but try to keep them as few as possible)
+ * (4) the timeout parameter is set
+ */
+ if ((qp_type(qp) == IB_QPT_RC) &&
+ (qp->req.state == QP_STATE_READY) &&
+ (psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
+ qp->qp_timeout_jiffies)
+ mod_timer(&qp->retrans_timer,
+ jiffies + qp->qp_timeout_jiffies);
+ goto exit;
+
+ case COMPST_ERROR_RETRY:
+ /* we come here if the retry timer fired and we did
+ * not receive a response packet. try to retry the send
+ * queue if that makes sense and the limits have not
+ * been exceeded. remember that some timeouts are
+ * spurious since we do not reset the timer but kick
+ * it down the road or let it expire
+ */
+
+ /* there is nothing to retry in this case */
+ if (!wqe || (wqe->state == wqe_state_posted))
+ goto exit;
+
+ if (qp->comp.retry_cnt > 0) {
+ if (qp->comp.retry_cnt != 7)
+ qp->comp.retry_cnt--;
+
+ /* no point in retrying if we have already
+ * seen the last ack that the requester could
+ * have caused
+ */
+ if (psn_compare(qp->req.psn,
+ qp->comp.psn) > 0) {
+ /* tell the requester to retry the
+ * send send queue next time around
+ */
+ qp->req.need_retry = 1;
+ rxe_run_task(&qp->req.task, 1);
+ }
+ goto exit;
+ } else {
+ wqe->status = IB_WC_RETRY_EXC_ERR;
+ state = COMPST_ERROR;
+ }
+ break;
+
+ case COMPST_RNR_RETRY:
+ if (qp->comp.rnr_retry > 0) {
+ if (qp->comp.rnr_retry != 7)
+ qp->comp.rnr_retry--;
+
+ qp->req.need_retry = 1;
+ pr_debug("set rnr nak timer\n");
+ mod_timer(&qp->rnr_nak_timer,
+ jiffies + rnrnak_jiffies(aeth_syn(pkt)
+ & ~AETH_TYPE_MASK));
+ goto exit;
+ } else {
+ wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
+ state = COMPST_ERROR;
+ }
+ break;
+
+ case COMPST_ERROR:
+ do_complete(qp, wqe);
+ rxe_qp_error(qp);
+ goto exit;
+ }
+ }
+
+exit:
+ /* we come here if we are done with processing and want the task to
+ * exit from the loop calling us
+ */
+ return -EAGAIN;
+
+done:
+ /* we come here if we have processed a packet we want the task to call
+ * us again to see if there is anything else to do
+ */
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
+ int cqe, int comp_vector, struct ib_udata *udata)
+{
+ int count;
+
+ if (cqe <= 0) {
+ pr_warn("cqe(%d) <= 0\n", cqe);
+ goto err1;
+ }
+
+ if (cqe > rxe->attr.max_cqe) {
+ pr_warn("cqe(%d) > max_cqe(%d)\n",
+ cqe, rxe->attr.max_cqe);
+ goto err1;
+ }
+
+ if (cq) {
+ count = queue_count(cq->queue);
+ if (cqe < count) {
+ pr_warn("cqe(%d) < current # elements in queue (%d)",
+ cqe, count);
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static void rxe_send_complete(unsigned long data)
+{
+ struct rxe_cq *cq = (struct rxe_cq *)data;
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
+ int comp_vector, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ int err;
+
+ cq->queue = rxe_queue_init(rxe, &cqe,
+ sizeof(struct rxe_cqe));
+ if (!cq->queue) {
+ pr_warn("unable to create cq\n");
+ return -ENOMEM;
+ }
+
+ err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,
+ cq->queue->buf_size, &cq->queue->ip);
+ if (err) {
+ kvfree(cq->queue->buf);
+ kfree(cq->queue);
+ return err;
+ }
+
+ if (udata)
+ cq->is_user = 1;
+
+ tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
+
+ spin_lock_init(&cq->cq_lock);
+ cq->ibcq.cqe = cqe;
+ return 0;
+}
+
+int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)
+{
+ int err;
+
+ err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe,
+ sizeof(struct rxe_cqe),
+ cq->queue->ip ? cq->queue->ip->context : NULL,
+ udata, NULL, &cq->cq_lock);
+ if (!err)
+ cq->ibcq.cqe = cqe;
+
+ return err;
+}
+
+int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
+{
+ struct ib_event ev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+
+ if (unlikely(queue_full(cq->queue))) {
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ if (cq->ibcq.event_handler) {
+ ev.device = cq->ibcq.device;
+ ev.element.cq = &cq->ibcq;
+ ev.event = IB_EVENT_CQ_ERR;
+ cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
+ }
+
+ return -EBUSY;
+ }
+
+ memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
+
+ /* make sure all changes to the CQ are written before we update the
+ * producer pointer
+ */
+ smp_wmb();
+
+ advance_producer(cq->queue);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ if ((cq->notify == IB_CQ_NEXT_COMP) ||
+ (cq->notify == IB_CQ_SOLICITED && solicited)) {
+ cq->notify = 0;
+ tasklet_schedule(&cq->comp_task);
+ }
+
+ return 0;
+}
+
+void rxe_cq_cleanup(void *arg)
+{
+ struct rxe_cq *cq = arg;
+
+ if (cq->queue)
+ rxe_queue_cleanup(cq->queue);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+#define DMA_BAD_ADDER ((u64)0)
+
+static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == DMA_BAD_ADDER;
+}
+
+static u64 rxe_dma_map_single(struct ib_device *dev,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ WARN_ON(!valid_dma_direction(direction));
+ return (uintptr_t)cpu_addr;
+}
+
+static void rxe_dma_unmap_single(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ WARN_ON(!valid_dma_direction(direction));
+}
+
+static u64 rxe_dma_map_page(struct ib_device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+ u64 addr;
+
+ WARN_ON(!valid_dma_direction(direction));
+
+ if (offset + size > PAGE_SIZE) {
+ addr = DMA_BAD_ADDER;
+ goto done;
+ }
+
+ addr = (uintptr_t)page_address(page);
+ if (addr)
+ addr += offset;
+
+done:
+ return addr;
+}
+
+static void rxe_dma_unmap_page(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ WARN_ON(!valid_dma_direction(direction));
+}
+
+static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ u64 addr;
+ int i;
+ int ret = nents;
+
+ WARN_ON(!valid_dma_direction(direction));
+
+ for_each_sg(sgl, sg, nents, i) {
+ addr = (uintptr_t)page_address(sg_page(sg));
+ if (!addr) {
+ ret = 0;
+ break;
+ }
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
+ }
+
+ return ret;
+}
+
+static void rxe_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ WARN_ON(!valid_dma_direction(direction));
+}
+
+static void rxe_sync_single_for_cpu(struct ib_device *dev,
+ u64 addr,
+ size_t size, enum dma_data_direction dir)
+{
+}
+
+static void rxe_sync_single_for_device(struct ib_device *dev,
+ u64 addr,
+ size_t size, enum dma_data_direction dir)
+{
+}
+
+static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p)
+ addr = page_address(p);
+
+ if (dma_handle)
+ *dma_handle = (uintptr_t)addr;
+
+ return addr;
+}
+
+static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
+{
+ free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
+ .mapping_error = rxe_mapping_error,
+ .map_single = rxe_dma_map_single,
+ .unmap_single = rxe_dma_unmap_single,
+ .map_page = rxe_dma_map_page,
+ .unmap_page = rxe_dma_unmap_page,
+ .map_sg = rxe_map_sg,
+ .unmap_sg = rxe_unmap_sg,
+ .sync_single_for_cpu = rxe_sync_single_for_cpu,
+ .sync_single_for_device = rxe_sync_single_for_device,
+ .alloc_coherent = rxe_dma_alloc_coherent,
+ .free_coherent = rxe_dma_free_coherent
+};
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_HDR_H
+#define RXE_HDR_H
+
+/* extracted information about a packet carried in an sk_buff struct fits in
+ * the skbuff cb array. Must be at most 48 bytes. stored in control block of
+ * sk_buff for received packets.
+ */
+struct rxe_pkt_info {
+ struct rxe_dev *rxe; /* device that owns packet */
+ struct rxe_qp *qp; /* qp that owns packet */
+ struct rxe_send_wqe *wqe; /* send wqe */
+ u8 *hdr; /* points to bth */
+ u32 mask; /* useful info about pkt */
+ u32 psn; /* bth psn of packet */
+ u16 pkey_index; /* partition of pkt */
+ u16 paylen; /* length of bth - icrc */
+ u8 port_num; /* port pkt received on */
+ u8 opcode; /* bth opcode of packet */
+ u8 offset; /* bth offset from pkt->hdr */
+};
+
+/* Macros should be used only for received skb */
+#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)
+#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)
+
+/*
+ * IBA header types and methods
+ *
+ * Some of these are for reference and completeness only since
+ * rxe does not currently support RD transport
+ * most of this could be moved into IB core. ib_pack.h has
+ * part of this but is incomplete
+ *
+ * Header specific routines to insert/extract values to/from headers
+ * the routines that are named __hhh_(set_)fff() take a pointer to a
+ * hhh header and get(set) the fff field. The routines named
+ * hhh_(set_)fff take a packet info struct and find the
+ * header and field based on the opcode in the packet.
+ * Conversion to/from network byte order from cpu order is also done.
+ */
+
+#define RXE_ICRC_SIZE (4)
+#define RXE_MAX_HDR_LENGTH (80)
+
+/******************************************************************************
+ * Base Transport Header
+ ******************************************************************************/
+struct rxe_bth {
+ u8 opcode;
+ u8 flags;
+ __be16 pkey;
+ __be32 qpn;
+ __be32 apsn;
+};
+
+#define BTH_TVER (0)
+#define BTH_DEF_PKEY (0xffff)
+
+#define BTH_SE_MASK (0x80)
+#define BTH_MIG_MASK (0x40)
+#define BTH_PAD_MASK (0x30)
+#define BTH_TVER_MASK (0x0f)
+#define BTH_FECN_MASK (0x80000000)
+#define BTH_BECN_MASK (0x40000000)
+#define BTH_RESV6A_MASK (0x3f000000)
+#define BTH_QPN_MASK (0x00ffffff)
+#define BTH_ACK_MASK (0x80000000)
+#define BTH_RESV7_MASK (0x7f000000)
+#define BTH_PSN_MASK (0x00ffffff)
+
+static inline u8 __bth_opcode(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return bth->opcode;
+}
+
+static inline void __bth_set_opcode(void *arg, u8 opcode)
+{
+ struct rxe_bth *bth = arg;
+
+ bth->opcode = opcode;
+}
+
+static inline u8 __bth_se(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return 0 != (BTH_SE_MASK & bth->flags);
+}
+
+static inline void __bth_set_se(void *arg, int se)
+{
+ struct rxe_bth *bth = arg;
+
+ if (se)
+ bth->flags |= BTH_SE_MASK;
+ else
+ bth->flags &= ~BTH_SE_MASK;
+}
+
+static inline u8 __bth_mig(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return 0 != (BTH_MIG_MASK & bth->flags);
+}
+
+static inline void __bth_set_mig(void *arg, u8 mig)
+{
+ struct rxe_bth *bth = arg;
+
+ if (mig)
+ bth->flags |= BTH_MIG_MASK;
+ else
+ bth->flags &= ~BTH_MIG_MASK;
+}
+
+static inline u8 __bth_pad(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return (BTH_PAD_MASK & bth->flags) >> 4;
+}
+
+static inline void __bth_set_pad(void *arg, u8 pad)
+{
+ struct rxe_bth *bth = arg;
+
+ bth->flags = (BTH_PAD_MASK & (pad << 4)) |
+ (~BTH_PAD_MASK & bth->flags);
+}
+
+static inline u8 __bth_tver(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return BTH_TVER_MASK & bth->flags;
+}
+
+static inline void __bth_set_tver(void *arg, u8 tver)
+{
+ struct rxe_bth *bth = arg;
+
+ bth->flags = (BTH_TVER_MASK & tver) |
+ (~BTH_TVER_MASK & bth->flags);
+}
+
+static inline u16 __bth_pkey(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return be16_to_cpu(bth->pkey);
+}
+
+static inline void __bth_set_pkey(void *arg, u16 pkey)
+{
+ struct rxe_bth *bth = arg;
+
+ bth->pkey = cpu_to_be16(pkey);
+}
+
+static inline u32 __bth_qpn(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return BTH_QPN_MASK & be32_to_cpu(bth->qpn);
+}
+
+static inline void __bth_set_qpn(void *arg, u32 qpn)
+{
+ struct rxe_bth *bth = arg;
+ u32 resvqpn = be32_to_cpu(bth->qpn);
+
+ bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) |
+ (~BTH_QPN_MASK & resvqpn));
+}
+
+static inline int __bth_fecn(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn);
+}
+
+static inline void __bth_set_fecn(void *arg, int fecn)
+{
+ struct rxe_bth *bth = arg;
+
+ if (fecn)
+ bth->qpn |= cpu_to_be32(BTH_FECN_MASK);
+ else
+ bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK);
+}
+
+static inline int __bth_becn(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn);
+}
+
+static inline void __bth_set_becn(void *arg, int becn)
+{
+ struct rxe_bth *bth = arg;
+
+ if (becn)
+ bth->qpn |= cpu_to_be32(BTH_BECN_MASK);
+ else
+ bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK);
+}
+
+static inline u8 __bth_resv6a(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24;
+}
+
+static inline void __bth_set_resv6a(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK);
+}
+
+static inline int __bth_ack(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn);
+}
+
+static inline void __bth_set_ack(void *arg, int ack)
+{
+ struct rxe_bth *bth = arg;
+
+ if (ack)
+ bth->apsn |= cpu_to_be32(BTH_ACK_MASK);
+ else
+ bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK);
+}
+
+static inline void __bth_set_resv7(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK);
+}
+
+static inline u32 __bth_psn(void *arg)
+{
+ struct rxe_bth *bth = arg;
+
+ return BTH_PSN_MASK & be32_to_cpu(bth->apsn);
+}
+
+static inline void __bth_set_psn(void *arg, u32 psn)
+{
+ struct rxe_bth *bth = arg;
+ u32 apsn = be32_to_cpu(bth->apsn);
+
+ bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) |
+ (~BTH_PSN_MASK & apsn));
+}
+
+static inline u8 bth_opcode(struct rxe_pkt_info *pkt)
+{
+ return __bth_opcode(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode)
+{
+ __bth_set_opcode(pkt->hdr + pkt->offset, opcode);
+}
+
+static inline u8 bth_se(struct rxe_pkt_info *pkt)
+{
+ return __bth_se(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_se(struct rxe_pkt_info *pkt, int se)
+{
+ __bth_set_se(pkt->hdr + pkt->offset, se);
+}
+
+static inline u8 bth_mig(struct rxe_pkt_info *pkt)
+{
+ return __bth_mig(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig)
+{
+ __bth_set_mig(pkt->hdr + pkt->offset, mig);
+}
+
+static inline u8 bth_pad(struct rxe_pkt_info *pkt)
+{
+ return __bth_pad(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad)
+{
+ __bth_set_pad(pkt->hdr + pkt->offset, pad);
+}
+
+static inline u8 bth_tver(struct rxe_pkt_info *pkt)
+{
+ return __bth_tver(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver)
+{
+ __bth_set_tver(pkt->hdr + pkt->offset, tver);
+}
+
+static inline u16 bth_pkey(struct rxe_pkt_info *pkt)
+{
+ return __bth_pkey(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey)
+{
+ __bth_set_pkey(pkt->hdr + pkt->offset, pkey);
+}
+
+static inline u32 bth_qpn(struct rxe_pkt_info *pkt)
+{
+ return __bth_qpn(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn)
+{
+ __bth_set_qpn(pkt->hdr + pkt->offset, qpn);
+}
+
+static inline int bth_fecn(struct rxe_pkt_info *pkt)
+{
+ return __bth_fecn(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn)
+{
+ __bth_set_fecn(pkt->hdr + pkt->offset, fecn);
+}
+
+static inline int bth_becn(struct rxe_pkt_info *pkt)
+{
+ return __bth_becn(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn)
+{
+ __bth_set_becn(pkt->hdr + pkt->offset, becn);
+}
+
+static inline u8 bth_resv6a(struct rxe_pkt_info *pkt)
+{
+ return __bth_resv6a(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_resv6a(struct rxe_pkt_info *pkt)
+{
+ __bth_set_resv6a(pkt->hdr + pkt->offset);
+}
+
+static inline int bth_ack(struct rxe_pkt_info *pkt)
+{
+ return __bth_ack(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack)
+{
+ __bth_set_ack(pkt->hdr + pkt->offset, ack);
+}
+
+static inline void bth_set_resv7(struct rxe_pkt_info *pkt)
+{
+ __bth_set_resv7(pkt->hdr + pkt->offset);
+}
+
+static inline u32 bth_psn(struct rxe_pkt_info *pkt)
+{
+ return __bth_psn(pkt->hdr + pkt->offset);
+}
+
+static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn)
+{
+ __bth_set_psn(pkt->hdr + pkt->offset, psn);
+}
+
+static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se,
+ int mig, int pad, u16 pkey, u32 qpn, int ack_req,
+ u32 psn)
+{
+ struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset);
+
+ bth->opcode = opcode;
+ bth->flags = (pad << 4) & BTH_PAD_MASK;
+ if (se)
+ bth->flags |= BTH_SE_MASK;
+ if (mig)
+ bth->flags |= BTH_MIG_MASK;
+ bth->pkey = cpu_to_be16(pkey);
+ bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK);
+ psn &= BTH_PSN_MASK;
+ if (ack_req)
+ psn |= BTH_ACK_MASK;
+ bth->apsn = cpu_to_be32(psn);
+}
+
+/******************************************************************************
+ * Reliable Datagram Extended Transport Header
+ ******************************************************************************/
+struct rxe_rdeth {
+ __be32 een;
+};
+
+#define RDETH_EEN_MASK (0x00ffffff)
+
+static inline u8 __rdeth_een(void *arg)
+{
+ struct rxe_rdeth *rdeth = arg;
+
+ return RDETH_EEN_MASK & be32_to_cpu(rdeth->een);
+}
+
+static inline void __rdeth_set_een(void *arg, u32 een)
+{
+ struct rxe_rdeth *rdeth = arg;
+
+ rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een);
+}
+
+static inline u8 rdeth_een(struct rxe_pkt_info *pkt)
+{
+ return __rdeth_een(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RDETH]);
+}
+
+static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een)
+{
+ __rdeth_set_een(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een);
+}
+
+/******************************************************************************
+ * Datagram Extended Transport Header
+ ******************************************************************************/
+struct rxe_deth {
+ __be32 qkey;
+ __be32 sqp;
+};
+
+#define GSI_QKEY (0x80010000)
+#define DETH_SQP_MASK (0x00ffffff)
+
+static inline u32 __deth_qkey(void *arg)
+{
+ struct rxe_deth *deth = arg;
+
+ return be32_to_cpu(deth->qkey);
+}
+
+static inline void __deth_set_qkey(void *arg, u32 qkey)
+{
+ struct rxe_deth *deth = arg;
+
+ deth->qkey = cpu_to_be32(qkey);
+}
+
+static inline u32 __deth_sqp(void *arg)
+{
+ struct rxe_deth *deth = arg;
+
+ return DETH_SQP_MASK & be32_to_cpu(deth->sqp);
+}
+
+static inline void __deth_set_sqp(void *arg, u32 sqp)
+{
+ struct rxe_deth *deth = arg;
+
+ deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp);
+}
+
+static inline u32 deth_qkey(struct rxe_pkt_info *pkt)
+{
+ return __deth_qkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_DETH]);
+}
+
+static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey)
+{
+ __deth_set_qkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey);
+}
+
+static inline u32 deth_sqp(struct rxe_pkt_info *pkt)
+{
+ return __deth_sqp(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_DETH]);
+}
+
+static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp)
+{
+ __deth_set_sqp(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp);
+}
+
+/******************************************************************************
+ * RDMA Extended Transport Header
+ ******************************************************************************/
+struct rxe_reth {
+ __be64 va;
+ __be32 rkey;
+ __be32 len;
+};
+
+static inline u64 __reth_va(void *arg)
+{
+ struct rxe_reth *reth = arg;
+
+ return be64_to_cpu(reth->va);
+}
+
+static inline void __reth_set_va(void *arg, u64 va)
+{
+ struct rxe_reth *reth = arg;
+
+ reth->va = cpu_to_be64(va);
+}
+
+static inline u32 __reth_rkey(void *arg)
+{
+ struct rxe_reth *reth = arg;
+
+ return be32_to_cpu(reth->rkey);
+}
+
+static inline void __reth_set_rkey(void *arg, u32 rkey)
+{
+ struct rxe_reth *reth = arg;
+
+ reth->rkey = cpu_to_be32(rkey);
+}
+
+static inline u32 __reth_len(void *arg)
+{
+ struct rxe_reth *reth = arg;
+
+ return be32_to_cpu(reth->len);
+}
+
+static inline void __reth_set_len(void *arg, u32 len)
+{
+ struct rxe_reth *reth = arg;
+
+ reth->len = cpu_to_be32(len);
+}
+
+static inline u64 reth_va(struct rxe_pkt_info *pkt)
+{
+ return __reth_va(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
+}
+
+static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va)
+{
+ __reth_set_va(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RETH], va);
+}
+
+static inline u32 reth_rkey(struct rxe_pkt_info *pkt)
+{
+ return __reth_rkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
+}
+
+static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
+{
+ __reth_set_rkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey);
+}
+
+static inline u32 reth_len(struct rxe_pkt_info *pkt)
+{
+ return __reth_len(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RETH]);
+}
+
+static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
+{
+ __reth_set_len(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
+}
+
+/******************************************************************************
+ * Atomic Extended Transport Header
+ ******************************************************************************/
+struct rxe_atmeth {
+ __be64 va;
+ __be32 rkey;
+ __be64 swap_add;
+ __be64 comp;
+} __attribute__((__packed__));
+
+static inline u64 __atmeth_va(void *arg)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ return be64_to_cpu(atmeth->va);
+}
+
+static inline void __atmeth_set_va(void *arg, u64 va)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ atmeth->va = cpu_to_be64(va);
+}
+
+static inline u32 __atmeth_rkey(void *arg)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ return be32_to_cpu(atmeth->rkey);
+}
+
+static inline void __atmeth_set_rkey(void *arg, u32 rkey)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ atmeth->rkey = cpu_to_be32(rkey);
+}
+
+static inline u64 __atmeth_swap_add(void *arg)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ return be64_to_cpu(atmeth->swap_add);
+}
+
+static inline void __atmeth_set_swap_add(void *arg, u64 swap_add)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ atmeth->swap_add = cpu_to_be64(swap_add);
+}
+
+static inline u64 __atmeth_comp(void *arg)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ return be64_to_cpu(atmeth->comp);
+}
+
+static inline void __atmeth_set_comp(void *arg, u64 comp)
+{
+ struct rxe_atmeth *atmeth = arg;
+
+ atmeth->comp = cpu_to_be64(comp);
+}
+
+static inline u64 atmeth_va(struct rxe_pkt_info *pkt)
+{
+ return __atmeth_va(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+}
+
+static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va)
+{
+ __atmeth_set_va(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va);
+}
+
+static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt)
+{
+ return __atmeth_rkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+}
+
+static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
+{
+ __atmeth_set_rkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey);
+}
+
+static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt)
+{
+ return __atmeth_swap_add(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+}
+
+static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add)
+{
+ __atmeth_set_swap_add(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add);
+}
+
+static inline u64 atmeth_comp(struct rxe_pkt_info *pkt)
+{
+ return __atmeth_comp(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]);
+}
+
+static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp)
+{
+ __atmeth_set_comp(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp);
+}
+
+/******************************************************************************
+ * Ack Extended Transport Header
+ ******************************************************************************/
+struct rxe_aeth {
+ __be32 smsn;
+};
+
+#define AETH_SYN_MASK (0xff000000)
+#define AETH_MSN_MASK (0x00ffffff)
+
+enum aeth_syndrome {
+ AETH_TYPE_MASK = 0xe0,
+ AETH_ACK = 0x00,
+ AETH_RNR_NAK = 0x20,
+ AETH_RSVD = 0x40,
+ AETH_NAK = 0x60,
+ AETH_ACK_UNLIMITED = 0x1f,
+ AETH_NAK_PSN_SEQ_ERROR = 0x60,
+ AETH_NAK_INVALID_REQ = 0x61,
+ AETH_NAK_REM_ACC_ERR = 0x62,
+ AETH_NAK_REM_OP_ERR = 0x63,
+ AETH_NAK_INV_RD_REQ = 0x64,
+};
+
+static inline u8 __aeth_syn(void *arg)
+{
+ struct rxe_aeth *aeth = arg;
+
+ return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24;
+}
+
+static inline void __aeth_set_syn(void *arg, u8 syn)
+{
+ struct rxe_aeth *aeth = arg;
+ u32 smsn = be32_to_cpu(aeth->smsn);
+
+ aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) |
+ (~AETH_SYN_MASK & smsn));
+}
+
+static inline u32 __aeth_msn(void *arg)
+{
+ struct rxe_aeth *aeth = arg;
+
+ return AETH_MSN_MASK & be32_to_cpu(aeth->smsn);
+}
+
+static inline void __aeth_set_msn(void *arg, u32 msn)
+{
+ struct rxe_aeth *aeth = arg;
+ u32 smsn = be32_to_cpu(aeth->smsn);
+
+ aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) |
+ (~AETH_MSN_MASK & smsn));
+}
+
+static inline u8 aeth_syn(struct rxe_pkt_info *pkt)
+{
+ return __aeth_syn(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_AETH]);
+}
+
+static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn)
+{
+ __aeth_set_syn(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn);
+}
+
+static inline u32 aeth_msn(struct rxe_pkt_info *pkt)
+{
+ return __aeth_msn(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_AETH]);
+}
+
+static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn)
+{
+ __aeth_set_msn(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn);
+}
+
+/******************************************************************************
+ * Atomic Ack Extended Transport Header
+ ******************************************************************************/
+struct rxe_atmack {
+ __be64 orig;
+};
+
+static inline u64 __atmack_orig(void *arg)
+{
+ struct rxe_atmack *atmack = arg;
+
+ return be64_to_cpu(atmack->orig);
+}
+
+static inline void __atmack_set_orig(void *arg, u64 orig)
+{
+ struct rxe_atmack *atmack = arg;
+
+ atmack->orig = cpu_to_be64(orig);
+}
+
+static inline u64 atmack_orig(struct rxe_pkt_info *pkt)
+{
+ return __atmack_orig(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]);
+}
+
+static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig)
+{
+ __atmack_set_orig(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig);
+}
+
+/******************************************************************************
+ * Immediate Extended Transport Header
+ ******************************************************************************/
+struct rxe_immdt {
+ __be32 imm;
+};
+
+static inline __be32 __immdt_imm(void *arg)
+{
+ struct rxe_immdt *immdt = arg;
+
+ return immdt->imm;
+}
+
+static inline void __immdt_set_imm(void *arg, __be32 imm)
+{
+ struct rxe_immdt *immdt = arg;
+
+ immdt->imm = imm;
+}
+
+static inline __be32 immdt_imm(struct rxe_pkt_info *pkt)
+{
+ return __immdt_imm(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]);
+}
+
+static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm)
+{
+ __immdt_set_imm(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm);
+}
+
+/******************************************************************************
+ * Invalidate Extended Transport Header
+ ******************************************************************************/
+struct rxe_ieth {
+ __be32 rkey;
+};
+
+static inline u32 __ieth_rkey(void *arg)
+{
+ struct rxe_ieth *ieth = arg;
+
+ return be32_to_cpu(ieth->rkey);
+}
+
+static inline void __ieth_set_rkey(void *arg, u32 rkey)
+{
+ struct rxe_ieth *ieth = arg;
+
+ ieth->rkey = cpu_to_be32(rkey);
+}
+
+static inline u32 ieth_rkey(struct rxe_pkt_info *pkt)
+{
+ return __ieth_rkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_IETH]);
+}
+
+static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey)
+{
+ __ieth_set_rkey(pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey);
+}
+
+enum rxe_hdr_length {
+ RXE_BTH_BYTES = sizeof(struct rxe_bth),
+ RXE_DETH_BYTES = sizeof(struct rxe_deth),
+ RXE_IMMDT_BYTES = sizeof(struct rxe_immdt),
+ RXE_RETH_BYTES = sizeof(struct rxe_reth),
+ RXE_AETH_BYTES = sizeof(struct rxe_aeth),
+ RXE_ATMACK_BYTES = sizeof(struct rxe_atmack),
+ RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),
+ RXE_IETH_BYTES = sizeof(struct rxe_ieth),
+ RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),
+};
+
+static inline size_t header_size(struct rxe_pkt_info *pkt)
+{
+ return pkt->offset + rxe_opcode[pkt->opcode].length;
+}
+
+static inline void *payload_addr(struct rxe_pkt_info *pkt)
+{
+ return pkt->hdr + pkt->offset
+ + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD];
+}
+
+static inline size_t payload_size(struct rxe_pkt_info *pkt)
+{
+ return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]
+ - bth_pad(pkt) - RXE_ICRC_SIZE;
+}
+
+#endif /* RXE_HDR_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/* Compute a partial ICRC for all the IB transport headers. */
+u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+{
+ unsigned int bth_offset = 0;
+ struct iphdr *ip4h = NULL;
+ struct ipv6hdr *ip6h = NULL;
+ struct udphdr *udph;
+ struct rxe_bth *bth;
+ int crc;
+ int length;
+ int hdr_size = sizeof(struct udphdr) +
+ (skb->protocol == htons(ETH_P_IP) ?
+ sizeof(struct iphdr) : sizeof(struct ipv6hdr));
+ /* pseudo header buffer size is calculate using ipv6 header size since
+ * it is bigger than ipv4
+ */
+ u8 pshdr[sizeof(struct udphdr) +
+ sizeof(struct ipv6hdr) +
+ RXE_BTH_BYTES];
+
+ /* This seed is the result of computing a CRC with a seed of
+ * 0xfffffff and 8 bytes of 0xff representing a masked LRH.
+ */
+ crc = 0xdebb20e3;
+
+ if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */
+ memcpy(pshdr, ip_hdr(skb), hdr_size);
+ ip4h = (struct iphdr *)pshdr;
+ udph = (struct udphdr *)(ip4h + 1);
+
+ ip4h->ttl = 0xff;
+ ip4h->check = CSUM_MANGLED_0;
+ ip4h->tos = 0xff;
+ } else { /* IPv6 */
+ memcpy(pshdr, ipv6_hdr(skb), hdr_size);
+ ip6h = (struct ipv6hdr *)pshdr;
+ udph = (struct udphdr *)(ip6h + 1);
+
+ memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl));
+ ip6h->priority = 0xf;
+ ip6h->hop_limit = 0xff;
+ }
+ udph->check = CSUM_MANGLED_0;
+
+ bth_offset += hdr_size;
+
+ memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES);
+ bth = (struct rxe_bth *)&pshdr[bth_offset];
+
+ /* exclude bth.resv8a */
+ bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
+
+ length = hdr_size + RXE_BTH_BYTES;
+ crc = crc32_le(crc, pshdr, length);
+
+ /* And finish to compute the CRC on the remainder of the headers. */
+ crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
+ rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
+ return crc;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_LOC_H
+#define RXE_LOC_H
+
+/* rxe_av.c */
+
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);
+
+int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
+ struct rxe_av *av, struct ib_ah_attr *attr);
+
+int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
+ struct ib_ah_attr *attr);
+
+int rxe_av_fill_ip_info(struct rxe_dev *rxe,
+ struct rxe_av *av,
+ struct ib_ah_attr *attr,
+ struct ib_gid_attr *sgid_attr,
+ union ib_gid *sgid);
+
+struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
+
+/* rxe_cq.c */
+int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
+ int cqe, int comp_vector, struct ib_udata *udata);
+
+int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
+ int comp_vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
+
+int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
+
+void rxe_cq_cleanup(void *arg);
+
+/* rxe_mcast.c */
+int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mc_grp **grp_p);
+
+int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct rxe_mc_grp *grp);
+
+int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+ union ib_gid *mgid);
+
+void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
+
+void rxe_mc_cleanup(void *arg);
+
+/* rxe_mmap.c */
+struct rxe_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ struct kref ref;
+ void *obj;
+
+ struct mminfo info;
+};
+
+void rxe_mmap_release(struct kref *ref);
+
+struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+/* rxe_mr.c */
+enum copy_direction {
+ to_mem_obj,
+ from_mem_obj,
+};
+
+int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
+ int access, struct rxe_mem *mem);
+
+int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
+ u64 length, u64 iova, int access, struct ib_udata *udata,
+ struct rxe_mem *mr);
+
+int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
+ int max_pages, struct rxe_mem *mem);
+
+int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr,
+ int length, enum copy_direction dir, u32 *crcp);
+
+int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access,
+ struct rxe_dma_info *dma, void *addr, int length,
+ enum copy_direction dir, u32 *crcp);
+
+void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length);
+
+enum lookup_type {
+ lookup_local,
+ lookup_remote,
+};
+
+struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
+ enum lookup_type type);
+
+int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);
+
+int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
+ u64 *page, int num_pages, u64 iova);
+
+void rxe_mem_cleanup(void *arg);
+
+int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+
+/* rxe_qp.c */
+int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
+
+int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
+ struct ib_qp_init_attr *init, struct ib_udata *udata,
+ struct ib_pd *ibpd);
+
+int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
+
+int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct ib_qp_attr *attr, int mask);
+
+int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
+ int mask, struct ib_udata *udata);
+
+int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
+
+void rxe_qp_error(struct rxe_qp *qp);
+
+void rxe_qp_destroy(struct rxe_qp *qp);
+
+void rxe_qp_cleanup(void *arg);
+
+static inline int qp_num(struct rxe_qp *qp)
+{
+ return qp->ibqp.qp_num;
+}
+
+static inline enum ib_qp_type qp_type(struct rxe_qp *qp)
+{
+ return qp->ibqp.qp_type;
+}
+
+static inline enum ib_qp_state qp_state(struct rxe_qp *qp)
+{
+ return qp->attr.qp_state;
+}
+
+static inline int qp_mtu(struct rxe_qp *qp)
+{
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ return qp->attr.path_mtu;
+ else
+ return RXE_PORT_MAX_MTU;
+}
+
+static inline int rcv_wqe_size(int max_sge)
+{
+ return sizeof(struct rxe_recv_wqe) +
+ max_sge * sizeof(struct ib_sge);
+}
+
+void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
+
+static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
+{
+ qp->resp.res_head++;
+ if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic))
+ qp->resp.res_head = 0;
+}
+
+void retransmit_timer(unsigned long data);
+void rnr_nak_timer(unsigned long data);
+
+void dump_qp(struct rxe_qp *qp);
+
+/* rxe_srq.c */
+#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
+
+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
+
+int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_init_attr *init,
+ struct ib_ucontext *context, struct ib_udata *udata);
+
+int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
+ struct ib_udata *udata);
+
+extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;
+
+void rxe_release(struct kref *kref);
+
+int rxe_completer(void *arg);
+int rxe_requester(void *arg);
+int rxe_responder(void *arg);
+
+u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb);
+
+void rxe_resp_queue_pkt(struct rxe_dev *rxe,
+ struct rxe_qp *qp, struct sk_buff *skb);
+
+void rxe_comp_queue_pkt(struct rxe_dev *rxe,
+ struct rxe_qp *qp, struct sk_buff *skb);
+
+static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)
+{
+ return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
+}
+
+static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt, struct sk_buff *skb)
+{
+ int err;
+ int is_request = pkt->mask & RXE_REQ_MASK;
+
+ if ((is_request && (qp->req.state != QP_STATE_READY)) ||
+ (!is_request && (qp->resp.state != QP_STATE_READY))) {
+ pr_info("Packet dropped. QP is not in ready state\n");
+ goto drop;
+ }
+
+ if (pkt->mask & RXE_LOOPBACK_MASK) {
+ memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
+ err = rxe->ifc_ops->loopback(skb);
+ } else {
+ err = rxe->ifc_ops->send(rxe, pkt, skb);
+ }
+
+ if (err) {
+ rxe->xmit_errors++;
+ return err;
+ }
+
+ atomic_inc(&qp->skb_out);
+
+ if ((qp_type(qp) != IB_QPT_RC) &&
+ (pkt->mask & RXE_END_MASK)) {
+ pkt->wqe->state = wqe_state_done;
+ rxe_run_task(&qp->comp.task, 1);
+ }
+
+ goto done;
+
+drop:
+ kfree_skb(skb);
+ err = 0;
+done:
+ return err;
+}
+
+#endif /* RXE_LOC_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mc_grp **grp_p)
+{
+ int err;
+ struct rxe_mc_grp *grp;
+
+ if (rxe->attr.max_mcast_qp_attach == 0) {
+ err = -EINVAL;
+ goto err1;
+ }
+
+ grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
+ if (grp)
+ goto done;
+
+ grp = rxe_alloc(&rxe->mc_grp_pool);
+ if (!grp) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ INIT_LIST_HEAD(&grp->qp_list);
+ spin_lock_init(&grp->mcg_lock);
+ grp->rxe = rxe;
+
+ rxe_add_key(grp, mgid);
+
+ err = rxe->ifc_ops->mcast_add(rxe, mgid);
+ if (err)
+ goto err2;
+
+done:
+ *grp_p = grp;
+ return 0;
+
+err2:
+ rxe_drop_ref(grp);
+err1:
+ return err;
+}
+
+int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct rxe_mc_grp *grp)
+{
+ int err;
+ struct rxe_mc_elem *elem;
+
+ /* check to see of the qp is already a member of the group */
+ spin_lock_bh(&qp->grp_lock);
+ spin_lock_bh(&grp->mcg_lock);
+ list_for_each_entry(elem, &grp->qp_list, qp_list) {
+ if (elem->qp == qp) {
+ err = 0;
+ goto out;
+ }
+ }
+
+ if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ elem = rxe_alloc(&rxe->mc_elem_pool);
+ if (!elem) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* each qp holds a ref on the grp */
+ rxe_add_ref(grp);
+
+ grp->num_qp++;
+ elem->qp = qp;
+ elem->grp = grp;
+
+ list_add(&elem->qp_list, &grp->qp_list);
+ list_add(&elem->grp_list, &qp->grp_list);
+
+ err = 0;
+out:
+ spin_unlock_bh(&grp->mcg_lock);
+ spin_unlock_bh(&qp->grp_lock);
+ return err;
+}
+
+int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
+ union ib_gid *mgid)
+{
+ struct rxe_mc_grp *grp;
+ struct rxe_mc_elem *elem, *tmp;
+
+ grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
+ if (!grp)
+ goto err1;
+
+ spin_lock_bh(&qp->grp_lock);
+ spin_lock_bh(&grp->mcg_lock);
+
+ list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
+ if (elem->qp == qp) {
+ list_del(&elem->qp_list);
+ list_del(&elem->grp_list);
+ grp->num_qp--;
+
+ spin_unlock_bh(&grp->mcg_lock);
+ spin_unlock_bh(&qp->grp_lock);
+ rxe_drop_ref(elem);
+ rxe_drop_ref(grp); /* ref held by QP */
+ rxe_drop_ref(grp); /* ref from get_key */
+ return 0;
+ }
+ }
+
+ spin_unlock_bh(&grp->mcg_lock);
+ spin_unlock_bh(&qp->grp_lock);
+ rxe_drop_ref(grp); /* ref from get_key */
+err1:
+ return -EINVAL;
+}
+
+void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
+{
+ struct rxe_mc_grp *grp;
+ struct rxe_mc_elem *elem;
+
+ while (1) {
+ spin_lock_bh(&qp->grp_lock);
+ if (list_empty(&qp->grp_list)) {
+ spin_unlock_bh(&qp->grp_lock);
+ break;
+ }
+ elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem,
+ grp_list);
+ list_del(&elem->grp_list);
+ spin_unlock_bh(&qp->grp_lock);
+
+ grp = elem->grp;
+ spin_lock_bh(&grp->mcg_lock);
+ list_del(&elem->qp_list);
+ grp->num_qp--;
+ spin_unlock_bh(&grp->mcg_lock);
+ rxe_drop_ref(grp);
+ rxe_drop_ref(elem);
+ }
+}
+
+void rxe_mc_cleanup(void *arg)
+{
+ struct rxe_mc_grp *grp = arg;
+ struct rxe_dev *rxe = grp->rxe;
+
+ rxe_drop_key(grp);
+ rxe->ifc_ops->mcast_delete(rxe, &grp->mgid);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+void rxe_mmap_release(struct kref *ref)
+{
+ struct rxe_mmap_info *ip = container_of(ref,
+ struct rxe_mmap_info, ref);
+ struct rxe_dev *rxe = to_rdev(ip->context->device);
+
+ spin_lock_bh(&rxe->pending_lock);
+
+ if (!list_empty(&ip->pending_mmaps))
+ list_del(&ip->pending_mmaps);
+
+ spin_unlock_bh(&rxe->pending_lock);
+
+ vfree(ip->obj); /* buf */
+ kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the memory region is mapped,
+ * to avoid releasing it.
+ */
+static void rxe_vma_open(struct vm_area_struct *vma)
+{
+ struct rxe_mmap_info *ip = vma->vm_private_data;
+
+ kref_get(&ip->ref);
+}
+
+static void rxe_vma_close(struct vm_area_struct *vma)
+{
+ struct rxe_mmap_info *ip = vma->vm_private_data;
+
+ kref_put(&ip->ref, rxe_mmap_release);
+}
+
+static struct vm_operations_struct rxe_vm_ops = {
+ .open = rxe_vma_open,
+ .close = rxe_vma_close,
+};
+
+/**
+ * rxe_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct rxe_dev *rxe = to_rdev(context->device);
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct rxe_mmap_info *ip, *pp;
+ int ret;
+
+ /*
+ * Search the device's list of objects waiting for a mmap call.
+ * Normally, this list is very short since a call to create a
+ * CQ, QP, or SRQ is soon followed by a call to mmap().
+ */
+ spin_lock_bh(&rxe->pending_lock);
+ list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) {
+ if (context != ip->context || (__u64)offset != ip->info.offset)
+ continue;
+
+ /* Don't allow a mmap larger than the object. */
+ if (size > ip->info.size) {
+ pr_err("mmap region is larger than the object!\n");
+ spin_unlock_bh(&rxe->pending_lock);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ goto found_it;
+ }
+ pr_warn("unable to find pending mmap info\n");
+ spin_unlock_bh(&rxe->pending_lock);
+ ret = -EINVAL;
+ goto done;
+
+found_it:
+ list_del_init(&ip->pending_mmaps);
+ spin_unlock_bh(&rxe->pending_lock);
+
+ ret = remap_vmalloc_range(vma, ip->obj, 0);
+ if (ret) {
+ pr_err("rxe: err %d from remap_vmalloc_range\n", ret);
+ goto done;
+ }
+
+ vma->vm_ops = &rxe_vm_ops;
+ vma->vm_private_data = ip;
+ rxe_vma_open(vma);
+done:
+ return ret;
+}
+
+/*
+ * Allocate information for rxe_mmap
+ */
+struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj)
+{
+ struct rxe_mmap_info *ip;
+
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip)
+ return NULL;
+
+ size = PAGE_ALIGN(size);
+
+ spin_lock_bh(&rxe->mmap_offset_lock);
+
+ if (rxe->mmap_offset == 0)
+ rxe->mmap_offset = PAGE_SIZE;
+
+ ip->info.offset = rxe->mmap_offset;
+ rxe->mmap_offset += size;
+
+ spin_unlock_bh(&rxe->mmap_offset_lock);
+
+ INIT_LIST_HEAD(&ip->pending_mmaps);
+ ip->info.size = size;
+ ip->context = context;
+ ip->obj = obj;
+ kref_init(&ip->ref);
+
+ return ip;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/*
+ * lfsr (linear feedback shift register) with period 255
+ */
+static u8 rxe_get_key(void)
+{
+ static unsigned key = 1;
+
+ key = key << 1;
+
+ key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
+ ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
+
+ key &= 0xff;
+
+ return key;
+}
+
+int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
+{
+ switch (mem->type) {
+ case RXE_MEM_TYPE_DMA:
+ return 0;
+
+ case RXE_MEM_TYPE_MR:
+ case RXE_MEM_TYPE_FMR:
+ return ((iova < mem->iova) ||
+ ((iova + length) > (mem->iova + mem->length))) ?
+ -EFAULT : 0;
+
+ default:
+ return -EFAULT;
+ }
+}
+
+#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
+ | IB_ACCESS_REMOTE_WRITE \
+ | IB_ACCESS_REMOTE_ATOMIC)
+
+static void rxe_mem_init(int access, struct rxe_mem *mem)
+{
+ u32 lkey = mem->pelem.index << 8 | rxe_get_key();
+ u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
+
+ if (mem->pelem.pool->type == RXE_TYPE_MR) {
+ mem->ibmr.lkey = lkey;
+ mem->ibmr.rkey = rkey;
+ }
+
+ mem->lkey = lkey;
+ mem->rkey = rkey;
+ mem->state = RXE_MEM_STATE_INVALID;
+ mem->type = RXE_MEM_TYPE_NONE;
+ mem->map_shift = ilog2(RXE_BUF_PER_MAP);
+}
+
+void rxe_mem_cleanup(void *arg)
+{
+ struct rxe_mem *mem = arg;
+ int i;
+
+ if (mem->umem)
+ ib_umem_release(mem->umem);
+
+ if (mem->map) {
+ for (i = 0; i < mem->num_map; i++)
+ kfree(mem->map[i]);
+
+ kfree(mem->map);
+ }
+}
+
+static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
+{
+ int i;
+ int num_map;
+ struct rxe_map **map = mem->map;
+
+ num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
+
+ mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
+ if (!mem->map)
+ goto err1;
+
+ for (i = 0; i < num_map; i++) {
+ mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
+ if (!mem->map[i])
+ goto err2;
+ }
+
+ WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP));
+
+ mem->map_shift = ilog2(RXE_BUF_PER_MAP);
+ mem->map_mask = RXE_BUF_PER_MAP - 1;
+
+ mem->num_buf = num_buf;
+ mem->num_map = num_map;
+ mem->max_buf = num_map * RXE_BUF_PER_MAP;
+
+ return 0;
+
+err2:
+ for (i--; i >= 0; i--)
+ kfree(mem->map[i]);
+
+ kfree(mem->map);
+err1:
+ return -ENOMEM;
+}
+
+int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
+ int access, struct rxe_mem *mem)
+{
+ rxe_mem_init(access, mem);
+
+ mem->pd = pd;
+ mem->access = access;
+ mem->state = RXE_MEM_STATE_VALID;
+ mem->type = RXE_MEM_TYPE_DMA;
+
+ return 0;
+}
+
+int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
+ u64 length, u64 iova, int access, struct ib_udata *udata,
+ struct rxe_mem *mem)
+{
+ int entry;
+ struct rxe_map **map;
+ struct rxe_phys_buf *buf = NULL;
+ struct ib_umem *umem;
+ struct scatterlist *sg;
+ int num_buf;
+ void *vaddr;
+ int err;
+
+ umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);
+ if (IS_ERR(umem)) {
+ pr_warn("err %d from rxe_umem_get\n",
+ (int)PTR_ERR(umem));
+ err = -EINVAL;
+ goto err1;
+ }
+
+ mem->umem = umem;
+ num_buf = umem->nmap;
+
+ rxe_mem_init(access, mem);
+
+ err = rxe_mem_alloc(rxe, mem, num_buf);
+ if (err) {
+ pr_warn("err %d from rxe_mem_alloc\n", err);
+ ib_umem_release(umem);
+ goto err1;
+ }
+
+ WARN_ON(!is_power_of_2(umem->page_size));
+
+ mem->page_shift = ilog2(umem->page_size);
+ mem->page_mask = umem->page_size - 1;
+
+ num_buf = 0;
+ map = mem->map;
+ if (length > 0) {
+ buf = map[0]->buf;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ pr_warn("null vaddr\n");
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ buf->addr = (uintptr_t)vaddr;
+ buf->size = umem->page_size;
+ num_buf++;
+ buf++;
+
+ if (num_buf >= RXE_BUF_PER_MAP) {
+ map++;
+ buf = map[0]->buf;
+ num_buf = 0;
+ }
+ }
+ }
+
+ mem->pd = pd;
+ mem->umem = umem;
+ mem->access = access;
+ mem->length = length;
+ mem->iova = iova;
+ mem->va = start;
+ mem->offset = ib_umem_offset(umem);
+ mem->state = RXE_MEM_STATE_VALID;
+ mem->type = RXE_MEM_TYPE_MR;
+
+ return 0;
+
+err1:
+ return err;
+}
+
+int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
+ int max_pages, struct rxe_mem *mem)
+{
+ int err;
+
+ rxe_mem_init(0, mem);
+
+ /* In fastreg, we also set the rkey */
+ mem->ibmr.rkey = mem->ibmr.lkey;
+
+ err = rxe_mem_alloc(rxe, mem, max_pages);
+ if (err)
+ goto err1;
+
+ mem->pd = pd;
+ mem->max_buf = max_pages;
+ mem->state = RXE_MEM_STATE_FREE;
+ mem->type = RXE_MEM_TYPE_MR;
+
+ return 0;
+
+err1:
+ return err;
+}
+
+static void lookup_iova(
+ struct rxe_mem *mem,
+ u64 iova,
+ int *m_out,
+ int *n_out,
+ size_t *offset_out)
+{
+ size_t offset = iova - mem->iova + mem->offset;
+ int map_index;
+ int buf_index;
+ u64 length;
+
+ if (likely(mem->page_shift)) {
+ *offset_out = offset & mem->page_mask;
+ offset >>= mem->page_shift;
+ *n_out = offset & mem->map_mask;
+ *m_out = offset >> mem->map_shift;
+ } else {
+ map_index = 0;
+ buf_index = 0;
+
+ length = mem->map[map_index]->buf[buf_index].size;
+
+ while (offset >= length) {
+ offset -= length;
+ buf_index++;
+
+ if (buf_index == RXE_BUF_PER_MAP) {
+ map_index++;
+ buf_index = 0;
+ }
+ length = mem->map[map_index]->buf[buf_index].size;
+ }
+
+ *m_out = map_index;
+ *n_out = buf_index;
+ *offset_out = offset;
+ }
+}
+
+void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
+{
+ size_t offset;
+ int m, n;
+ void *addr;
+
+ if (mem->state != RXE_MEM_STATE_VALID) {
+ pr_warn("mem not in valid state\n");
+ addr = NULL;
+ goto out;
+ }
+
+ if (!mem->map) {
+ addr = (void *)(uintptr_t)iova;
+ goto out;
+ }
+
+ if (mem_check_range(mem, iova, length)) {
+ pr_warn("range violation\n");
+ addr = NULL;
+ goto out;
+ }
+
+ lookup_iova(mem, iova, &m, &n, &offset);
+
+ if (offset + length > mem->map[m]->buf[n].size) {
+ pr_warn("crosses page boundary\n");
+ addr = NULL;
+ goto out;
+ }
+
+ addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
+
+out:
+ return addr;
+}
+
+/* copy data from a range (vaddr, vaddr+length-1) to or from
+ * a mem object starting at iova. Compute incremental value of
+ * crc32 if crcp is not zero. caller must hold a reference to mem
+ */
+int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
+ enum copy_direction dir, u32 *crcp)
+{
+ int err;
+ int bytes;
+ u8 *va;
+ struct rxe_map **map;
+ struct rxe_phys_buf *buf;
+ int m;
+ int i;
+ size_t offset;
+ u32 crc = crcp ? (*crcp) : 0;
+
+ if (mem->type == RXE_MEM_TYPE_DMA) {
+ u8 *src, *dest;
+
+ src = (dir == to_mem_obj) ?
+ addr : ((void *)(uintptr_t)iova);
+
+ dest = (dir == to_mem_obj) ?
+ ((void *)(uintptr_t)iova) : addr;
+
+ if (crcp)
+ *crcp = crc32_le(*crcp, src, length);
+
+ memcpy(dest, src, length);
+
+ return 0;
+ }
+
+ WARN_ON(!mem->map);
+
+ err = mem_check_range(mem, iova, length);
+ if (err) {
+ err = -EFAULT;
+ goto err1;
+ }
+
+ lookup_iova(mem, iova, &m, &i, &offset);
+
+ map = mem->map + m;
+ buf = map[0]->buf + i;
+
+ while (length > 0) {
+ u8 *src, *dest;
+
+ va = (u8 *)(uintptr_t)buf->addr + offset;
+ src = (dir == to_mem_obj) ? addr : va;
+ dest = (dir == to_mem_obj) ? va : addr;
+
+ bytes = buf->size - offset;
+
+ if (bytes > length)
+ bytes = length;
+
+ if (crcp)
+ crc = crc32_le(crc, src, bytes);
+
+ memcpy(dest, src, bytes);
+
+ length -= bytes;
+ addr += bytes;
+
+ offset = 0;
+ buf++;
+ i++;
+
+ if (i == RXE_BUF_PER_MAP) {
+ i = 0;
+ map++;
+ buf = map[0]->buf;
+ }
+ }
+
+ if (crcp)
+ *crcp = crc;
+
+ return 0;
+
+err1:
+ return err;
+}
+
+/* copy data in or out of a wqe, i.e. sg list
+ * under the control of a dma descriptor
+ */
+int copy_data(
+ struct rxe_dev *rxe,
+ struct rxe_pd *pd,
+ int access,
+ struct rxe_dma_info *dma,
+ void *addr,
+ int length,
+ enum copy_direction dir,
+ u32 *crcp)
+{
+ int bytes;
+ struct rxe_sge *sge = &dma->sge[dma->cur_sge];
+ int offset = dma->sge_offset;
+ int resid = dma->resid;
+ struct rxe_mem *mem = NULL;
+ u64 iova;
+ int err;
+
+ if (length == 0)
+ return 0;
+
+ if (length > resid) {
+ err = -EINVAL;
+ goto err2;
+ }
+
+ if (sge->length && (offset < sge->length)) {
+ mem = lookup_mem(pd, access, sge->lkey, lookup_local);
+ if (!mem) {
+ err = -EINVAL;
+ goto err1;
+ }
+ }
+
+ while (length > 0) {
+ bytes = length;
+
+ if (offset >= sge->length) {
+ if (mem) {
+ rxe_drop_ref(mem);
+ mem = NULL;
+ }
+ sge++;
+ dma->cur_sge++;
+ offset = 0;
+
+ if (dma->cur_sge >= dma->num_sge) {
+ err = -ENOSPC;
+ goto err2;
+ }
+
+ if (sge->length) {
+ mem = lookup_mem(pd, access, sge->lkey,
+ lookup_local);
+ if (!mem) {
+ err = -EINVAL;
+ goto err1;
+ }
+ } else {
+ continue;
+ }
+ }
+
+ if (bytes > sge->length - offset)
+ bytes = sge->length - offset;
+
+ if (bytes > 0) {
+ iova = sge->addr + offset;
+
+ err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
+ if (err)
+ goto err2;
+
+ offset += bytes;
+ resid -= bytes;
+ length -= bytes;
+ addr += bytes;
+ }
+ }
+
+ dma->sge_offset = offset;
+ dma->resid = resid;
+
+ if (mem)
+ rxe_drop_ref(mem);
+
+ return 0;
+
+err2:
+ if (mem)
+ rxe_drop_ref(mem);
+err1:
+ return err;
+}
+
+int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
+{
+ struct rxe_sge *sge = &dma->sge[dma->cur_sge];
+ int offset = dma->sge_offset;
+ int resid = dma->resid;
+
+ while (length) {
+ unsigned int bytes;
+
+ if (offset >= sge->length) {
+ sge++;
+ dma->cur_sge++;
+ offset = 0;
+ if (dma->cur_sge >= dma->num_sge)
+ return -ENOSPC;
+ }
+
+ bytes = length;
+
+ if (bytes > sge->length - offset)
+ bytes = sge->length - offset;
+
+ offset += bytes;
+ resid -= bytes;
+ length -= bytes;
+ }
+
+ dma->sge_offset = offset;
+ dma->resid = resid;
+
+ return 0;
+}
+
+/* (1) find the mem (mr or mw) corresponding to lkey/rkey
+ * depending on lookup_type
+ * (2) verify that the (qp) pd matches the mem pd
+ * (3) verify that the mem can support the requested access
+ * (4) verify that mem state is valid
+ */
+struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
+ enum lookup_type type)
+{
+ struct rxe_mem *mem;
+ struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
+ int index = key >> 8;
+
+ if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
+ mem = rxe_pool_get_index(&rxe->mr_pool, index);
+ if (!mem)
+ goto err1;
+ } else {
+ goto err1;
+ }
+
+ if ((type == lookup_local && mem->lkey != key) ||
+ (type == lookup_remote && mem->rkey != key))
+ goto err2;
+
+ if (mem->pd != pd)
+ goto err2;
+
+ if (access && !(access & mem->access))
+ goto err2;
+
+ if (mem->state != RXE_MEM_STATE_VALID)
+ goto err2;
+
+ return mem;
+
+err2:
+ rxe_drop_ref(mem);
+err1:
+ return NULL;
+}
+
+int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
+ u64 *page, int num_pages, u64 iova)
+{
+ int i;
+ int num_buf;
+ int err;
+ struct rxe_map **map;
+ struct rxe_phys_buf *buf;
+ int page_size;
+
+ if (num_pages > mem->max_buf) {
+ err = -EINVAL;
+ goto err1;
+ }
+
+ num_buf = 0;
+ page_size = 1 << mem->page_shift;
+ map = mem->map;
+ buf = map[0]->buf;
+
+ for (i = 0; i < num_pages; i++) {
+ buf->addr = *page++;
+ buf->size = page_size;
+ buf++;
+ num_buf++;
+
+ if (num_buf == RXE_BUF_PER_MAP) {
+ map++;
+ buf = map[0]->buf;
+ num_buf = 0;
+ }
+ }
+
+ mem->iova = iova;
+ mem->va = iova;
+ mem->length = num_pages << mem->page_shift;
+ mem->state = RXE_MEM_STATE_VALID;
+
+ return 0;
+
+err1:
+ return err;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <net/udp_tunnel.h>
+#include <net/sch_generic.h>
+#include <linux/netfilter.h>
+#include <rdma/ib_addr.h>
+
+#include "rxe.h"
+#include "rxe_net.h"
+#include "rxe_loc.h"
+
+static LIST_HEAD(rxe_dev_list);
+static spinlock_t dev_list_lock; /* spinlock for device list */
+
+struct rxe_dev *net_to_rxe(struct net_device *ndev)
+{
+ struct rxe_dev *rxe;
+ struct rxe_dev *found = NULL;
+
+ spin_lock_bh(&dev_list_lock);
+ list_for_each_entry(rxe, &rxe_dev_list, list) {
+ if (rxe->ndev == ndev) {
+ found = rxe;
+ break;
+ }
+ }
+ spin_unlock_bh(&dev_list_lock);
+
+ return found;
+}
+
+struct rxe_dev *get_rxe_by_name(const char* name)
+{
+ struct rxe_dev *rxe;
+ struct rxe_dev *found = NULL;
+
+ spin_lock_bh(&dev_list_lock);
+ list_for_each_entry(rxe, &rxe_dev_list, list) {
+ if (!strcmp(name, rxe->ib_dev.name)) {
+ found = rxe;
+ break;
+ }
+ }
+ spin_unlock_bh(&dev_list_lock);
+ return found;
+}
+
+
+struct rxe_recv_sockets recv_sockets;
+
+static __be64 rxe_mac_to_eui64(struct net_device *ndev)
+{
+ unsigned char *mac_addr = ndev->dev_addr;
+ __be64 eui64;
+ unsigned char *dst = (unsigned char *)&eui64;
+
+ dst[0] = mac_addr[0] ^ 2;
+ dst[1] = mac_addr[1];
+ dst[2] = mac_addr[2];
+ dst[3] = 0xff;
+ dst[4] = 0xfe;
+ dst[5] = mac_addr[3];
+ dst[6] = mac_addr[4];
+ dst[7] = mac_addr[5];
+
+ return eui64;
+}
+
+static __be64 node_guid(struct rxe_dev *rxe)
+{
+ return rxe_mac_to_eui64(rxe->ndev);
+}
+
+static __be64 port_guid(struct rxe_dev *rxe)
+{
+ return rxe_mac_to_eui64(rxe->ndev);
+}
+
+static struct device *dma_device(struct rxe_dev *rxe)
+{
+ struct net_device *ndev;
+
+ ndev = rxe->ndev;
+
+ if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ ndev = vlan_dev_real_dev(ndev);
+
+ return ndev->dev.parent;
+}
+
+static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+ int err;
+ unsigned char ll_addr[ETH_ALEN];
+
+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+ err = dev_mc_add(rxe->ndev, ll_addr);
+
+ return err;
+}
+
+static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+ int err;
+ unsigned char ll_addr[ETH_ALEN];
+
+ ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+ err = dev_mc_del(rxe->ndev, ll_addr);
+
+ return err;
+}
+
+static struct dst_entry *rxe_find_route4(struct net_device *ndev,
+ struct in_addr *saddr,
+ struct in_addr *daddr)
+{
+ struct rtable *rt;
+ struct flowi4 fl = { { 0 } };
+
+ memset(&fl, 0, sizeof(fl));
+ fl.flowi4_oif = ndev->ifindex;
+ memcpy(&fl.saddr, saddr, sizeof(*saddr));
+ memcpy(&fl.daddr, daddr, sizeof(*daddr));
+ fl.flowi4_proto = IPPROTO_UDP;
+
+ rt = ip_route_output_key(&init_net, &fl);
+ if (IS_ERR(rt)) {
+ pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
+ return NULL;
+ }
+
+ return &rt->dst;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+ struct in6_addr *saddr,
+ struct in6_addr *daddr)
+{
+ struct dst_entry *ndst;
+ struct flowi6 fl6 = { { 0 } };
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = ndev->ifindex;
+ memcpy(&fl6.saddr, saddr, sizeof(*saddr));
+ memcpy(&fl6.daddr, daddr, sizeof(*daddr));
+ fl6.flowi6_proto = IPPROTO_UDP;
+
+ if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),
+ recv_sockets.sk6->sk, &ndst, &fl6))) {
+ pr_err_ratelimited("no route to %pI6\n", daddr);
+ goto put;
+ }
+
+ if (unlikely(ndst->error)) {
+ pr_err("no route to %pI6\n", daddr);
+ goto put;
+ }
+
+ return ndst;
+put:
+ dst_release(ndst);
+ return NULL;
+}
+
+#else
+
+static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+ struct in6_addr *saddr,
+ struct in6_addr *daddr)
+{
+ return NULL;
+}
+
+#endif
+
+static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct udphdr *udph;
+ struct net_device *ndev = skb->dev;
+ struct rxe_dev *rxe = net_to_rxe(ndev);
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
+ if (!rxe)
+ goto drop;
+
+ if (skb_linearize(skb)) {
+ pr_err("skb_linearize failed\n");
+ goto drop;
+ }
+
+ udph = udp_hdr(skb);
+ pkt->rxe = rxe;
+ pkt->port_num = 1;
+ pkt->hdr = (u8 *)(udph + 1);
+ pkt->mask = RXE_GRH_MASK;
+ pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
+
+ return rxe_rcv(skb);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
+ bool ipv6)
+{
+ int err;
+ struct socket *sock;
+ struct udp_port_cfg udp_cfg;
+ struct udp_tunnel_sock_cfg tnl_cfg;
+
+ memset(&udp_cfg, 0, sizeof(udp_cfg));
+
+ if (ipv6) {
+ udp_cfg.family = AF_INET6;
+ udp_cfg.ipv6_v6only = 1;
+ } else {
+ udp_cfg.family = AF_INET;
+ }
+
+ udp_cfg.local_udp_port = port;
+
+ /* Create UDP socket */
+ err = udp_sock_create(net, &udp_cfg, &sock);
+ if (err < 0) {
+ pr_err("failed to create udp socket. err = %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ tnl_cfg.sk_user_data = NULL;
+ tnl_cfg.encap_type = 1;
+ tnl_cfg.encap_rcv = rxe_udp_encap_recv;
+ tnl_cfg.encap_destroy = NULL;
+
+ /* Setup UDP tunnel */
+ setup_udp_tunnel_sock(net, sock, &tnl_cfg);
+
+ return sock;
+}
+
+static void rxe_release_udp_tunnel(struct socket *sk)
+{
+ udp_tunnel_sock_release(sk);
+}
+
+static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
+ __be16 dst_port)
+{
+ struct udphdr *udph;
+
+ __skb_push(skb, sizeof(*udph));
+ skb_reset_transport_header(skb);
+ udph = udp_hdr(skb);
+
+ udph->dest = dst_port;
+ udph->source = src_port;
+ udph->len = htons(skb->len);
+ udph->check = 0;
+}
+
+static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
+ __be32 saddr, __be32 daddr, __u8 proto,
+ __u8 tos, __u8 ttl, __be16 df, bool xnet)
+{
+ struct iphdr *iph;
+
+ skb_scrub_packet(skb, xnet);
+
+ skb_clear_hash(skb);
+ skb_dst_set(skb, dst);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+
+ iph = ip_hdr(skb);
+
+ iph->version = IPVERSION;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->frag_off = df;
+ iph->protocol = proto;
+ iph->tos = tos;
+ iph->daddr = daddr;
+ iph->saddr = saddr;
+ iph->ttl = ttl;
+ __ip_select_ident(dev_net(dst->dev), iph,
+ skb_shinfo(skb)->gso_segs ?: 1);
+ iph->tot_len = htons(skb->len);
+ ip_send_check(iph);
+}
+
+static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 proto, __u8 prio, __u8 ttl)
+{
+ struct ipv6hdr *ip6h;
+
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+ | IPSKB_REROUTED);
+ skb_dst_set(skb, dst);
+
+ __skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6_flow_hdr(ip6h, prio, htonl(0));
+ ip6h->payload_len = htons(skb->len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+ ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
+}
+
+static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
+{
+ struct dst_entry *dst;
+ bool xnet = false;
+ __be16 df = htons(IP_DF);
+ struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
+ struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
+ dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ if (!dst) {
+ pr_err("Host not reachable\n");
+ return -EHOSTUNREACH;
+ }
+
+ if (!memcmp(saddr, daddr, sizeof(*daddr)))
+ pkt->mask |= RXE_LOOPBACK_MASK;
+
+ prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
+ htons(ROCE_V2_UDP_DPORT));
+
+ prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
+ av->grh.traffic_class, av->grh.hop_limit, df, xnet);
+ return 0;
+}
+
+static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
+{
+ struct dst_entry *dst;
+ struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
+ struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
+ dst = rxe_find_route6(rxe->ndev, saddr, daddr);
+ if (!dst) {
+ pr_err("Host not reachable\n");
+ return -EHOSTUNREACH;
+ }
+
+ if (!memcmp(saddr, daddr, sizeof(*daddr)))
+ pkt->mask |= RXE_LOOPBACK_MASK;
+
+ prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
+ htons(ROCE_V2_UDP_DPORT));
+
+ prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
+ av->grh.traffic_class,
+ av->grh.hop_limit);
+ return 0;
+}
+
+static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 *crc)
+{
+ int err = 0;
+ struct rxe_av *av = rxe_get_av(pkt);
+
+ if (av->network_type == RDMA_NETWORK_IPV4)
+ err = prepare4(rxe, skb, av);
+ else if (av->network_type == RDMA_NETWORK_IPV6)
+ err = prepare6(rxe, skb, av);
+
+ *crc = rxe_icrc_hdr(pkt, skb);
+
+ return err;
+}
+
+static void rxe_skb_tx_dtor(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct rxe_qp *qp = sk->sk_user_data;
+ int skb_out = atomic_dec_return(&qp->skb_out);
+
+ if (unlikely(qp->need_req_skb &&
+ skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
+ rxe_run_task(&qp->req.task, 1);
+}
+
+static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+ struct rxe_av *av;
+ int err;
+
+ av = rxe_get_av(pkt);
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ nskb->destructor = rxe_skb_tx_dtor;
+ nskb->sk = pkt->qp->sk->sk;
+
+ if (av->network_type == RDMA_NETWORK_IPV4) {
+ err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
+ } else if (av->network_type == RDMA_NETWORK_IPV6) {
+ err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
+ } else {
+ pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
+ kfree_skb(nskb);
+ return -EINVAL;
+ }
+
+ if (unlikely(net_xmit_eval(err))) {
+ pr_debug("error sending packet: %d\n", err);
+ return -EAGAIN;
+ }
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
+static int loopback(struct sk_buff *skb)
+{
+ return rxe_rcv(skb);
+}
+
+static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
+{
+ return rxe->port.port_guid == av->grh.dgid.global.interface_id;
+}
+
+static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+ int paylen, struct rxe_pkt_info *pkt)
+{
+ unsigned int hdr_len;
+ struct sk_buff *skb;
+
+ if (av->network_type == RDMA_NETWORK_IPV4)
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) +
+ sizeof(struct iphdr);
+ else
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) +
+ sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev),
+ GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
+
+ skb->dev = rxe->ndev;
+ if (av->network_type == RDMA_NETWORK_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+ else
+ skb->protocol = htons(ETH_P_IPV6);
+
+ pkt->rxe = rxe;
+ pkt->port_num = 1;
+ pkt->hdr = skb_put(skb, paylen);
+ pkt->mask |= RXE_GRH_MASK;
+
+ memset(pkt->hdr, 0, paylen);
+
+ return skb;
+}
+
+/*
+ * this is required by rxe_cfg to match rxe devices in
+ * /sys/class/infiniband up with their underlying ethernet devices
+ */
+static char *parent_name(struct rxe_dev *rxe, unsigned int port_num)
+{
+ return rxe->ndev->name;
+}
+
+static enum rdma_link_layer link_layer(struct rxe_dev *rxe,
+ unsigned int port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static struct rxe_ifc_ops ifc_ops = {
+ .node_guid = node_guid,
+ .port_guid = port_guid,
+ .dma_device = dma_device,
+ .mcast_add = mcast_add,
+ .mcast_delete = mcast_delete,
+ .prepare = prepare,
+ .send = send,
+ .loopback = loopback,
+ .init_packet = init_packet,
+ .parent_name = parent_name,
+ .link_layer = link_layer,
+};
+
+struct rxe_dev *rxe_net_add(struct net_device *ndev)
+{
+ int err;
+ struct rxe_dev *rxe = NULL;
+
+ rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
+ if (!rxe)
+ return NULL;
+
+ rxe->ifc_ops = &ifc_ops;
+ rxe->ndev = ndev;
+
+ err = rxe_add(rxe, ndev->mtu);
+ if (err) {
+ ib_dealloc_device(&rxe->ib_dev);
+ return NULL;
+ }
+
+ spin_lock_bh(&dev_list_lock);
+ list_add_tail(&rxe_dev_list, &rxe->list);
+ spin_unlock_bh(&dev_list_lock);
+ return rxe;
+}
+
+void rxe_remove_all(void)
+{
+ spin_lock_bh(&dev_list_lock);
+ while (!list_empty(&rxe_dev_list)) {
+ struct rxe_dev *rxe =
+ list_first_entry(&rxe_dev_list, struct rxe_dev, list);
+
+ list_del(&rxe->list);
+ spin_unlock_bh(&dev_list_lock);
+ rxe_remove(rxe);
+ spin_lock_bh(&dev_list_lock);
+ }
+ spin_unlock_bh(&dev_list_lock);
+}
+EXPORT_SYMBOL(rxe_remove_all);
+
+static void rxe_port_event(struct rxe_dev *rxe,
+ enum ib_event_type event)
+{
+ struct ib_event ev;
+
+ ev.device = &rxe->ib_dev;
+ ev.element.port_num = 1;
+ ev.event = event;
+
+ ib_dispatch_event(&ev);
+}
+
+/* Caller must hold net_info_lock */
+void rxe_port_up(struct rxe_dev *rxe)
+{
+ struct rxe_port *port;
+
+ port = &rxe->port;
+ port->attr.state = IB_PORT_ACTIVE;
+ port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
+
+ rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
+ pr_info("rxe: set %s active\n", rxe->ib_dev.name);
+ return;
+}
+
+/* Caller must hold net_info_lock */
+void rxe_port_down(struct rxe_dev *rxe)
+{
+ struct rxe_port *port;
+
+ port = &rxe->port;
+ port->attr.state = IB_PORT_DOWN;
+ port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
+
+ rxe_port_event(rxe, IB_EVENT_PORT_ERR);
+ pr_info("rxe: set %s down\n", rxe->ib_dev.name);
+ return;
+}
+
+static int rxe_notify(struct notifier_block *not_blk,
+ unsigned long event,
+ void *arg)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(arg);
+ struct rxe_dev *rxe = net_to_rxe(ndev);
+
+ if (!rxe)
+ goto out;
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ list_del(&rxe->list);
+ rxe_remove(rxe);
+ break;
+ case NETDEV_UP:
+ rxe_port_up(rxe);
+ break;
+ case NETDEV_DOWN:
+ rxe_port_down(rxe);
+ break;
+ case NETDEV_CHANGEMTU:
+ pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu);
+ rxe_set_mtu(rxe, ndev->mtu);
+ break;
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGE:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ default:
+ pr_info("rxe: ignoring netdev event = %ld for %s\n",
+ event, ndev->name);
+ break;
+ }
+out:
+ return NOTIFY_OK;
+}
+
+static struct notifier_block rxe_net_notifier = {
+ .notifier_call = rxe_notify,
+};
+
+int rxe_net_init(void)
+{
+ int err;
+
+ spin_lock_init(&dev_list_lock);
+
+ recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
+ htons(ROCE_V2_UDP_DPORT), true);
+ if (IS_ERR(recv_sockets.sk6)) {
+ recv_sockets.sk6 = NULL;
+ pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
+ return -1;
+ }
+
+ recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
+ htons(ROCE_V2_UDP_DPORT), false);
+ if (IS_ERR(recv_sockets.sk4)) {
+ rxe_release_udp_tunnel(recv_sockets.sk6);
+ recv_sockets.sk4 = NULL;
+ recv_sockets.sk6 = NULL;
+ pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
+ return -1;
+ }
+
+ err = register_netdevice_notifier(&rxe_net_notifier);
+ if (err) {
+ rxe_release_udp_tunnel(recv_sockets.sk6);
+ rxe_release_udp_tunnel(recv_sockets.sk4);
+ pr_err("rxe: Failed to rigister netdev notifier\n");
+ }
+
+ return err;
+}
+
+void rxe_net_exit(void)
+{
+ if (recv_sockets.sk6)
+ rxe_release_udp_tunnel(recv_sockets.sk6);
+
+ if (recv_sockets.sk4)
+ rxe_release_udp_tunnel(recv_sockets.sk4);
+
+ unregister_netdevice_notifier(&rxe_net_notifier);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_NET_H
+#define RXE_NET_H
+
+#include <net/sock.h>
+#include <net/if_inet6.h>
+#include <linux/module.h>
+
+struct rxe_recv_sockets {
+ struct socket *sk4;
+ struct socket *sk6;
+};
+
+extern struct rxe_recv_sockets recv_sockets;
+
+struct rxe_dev *rxe_net_add(struct net_device *ndev);
+
+int rxe_net_init(void);
+void rxe_net_exit(void);
+
+#endif /* RXE_NET_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_pack.h>
+#include "rxe_opcode.h"
+#include "rxe_hdr.h"
+
+/* useful information about work request opcodes and pkt opcodes in
+ * table form
+ */
+struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
+ [IB_WR_RDMA_WRITE] = {
+ .name = "IB_WR_RDMA_WRITE",
+ .mask = {
+ [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
+ [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
+ },
+ },
+ [IB_WR_RDMA_WRITE_WITH_IMM] = {
+ .name = "IB_WR_RDMA_WRITE_WITH_IMM",
+ .mask = {
+ [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK,
+ [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK,
+ },
+ },
+ [IB_WR_SEND] = {
+ .name = "IB_WR_SEND",
+ .mask = {
+ [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
+ },
+ },
+ [IB_WR_SEND_WITH_IMM] = {
+ .name = "IB_WR_SEND_WITH_IMM",
+ .mask = {
+ [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
+ },
+ },
+ [IB_WR_RDMA_READ] = {
+ .name = "IB_WR_RDMA_READ",
+ .mask = {
+ [IB_QPT_RC] = WR_READ_MASK,
+ },
+ },
+ [IB_WR_ATOMIC_CMP_AND_SWP] = {
+ .name = "IB_WR_ATOMIC_CMP_AND_SWP",
+ .mask = {
+ [IB_QPT_RC] = WR_ATOMIC_MASK,
+ },
+ },
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = {
+ .name = "IB_WR_ATOMIC_FETCH_AND_ADD",
+ .mask = {
+ [IB_QPT_RC] = WR_ATOMIC_MASK,
+ },
+ },
+ [IB_WR_LSO] = {
+ .name = "IB_WR_LSO",
+ .mask = {
+ /* not supported */
+ },
+ },
+ [IB_WR_SEND_WITH_INV] = {
+ .name = "IB_WR_SEND_WITH_INV",
+ .mask = {
+ [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
+ [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK,
+ },
+ },
+ [IB_WR_RDMA_READ_WITH_INV] = {
+ .name = "IB_WR_RDMA_READ_WITH_INV",
+ .mask = {
+ [IB_QPT_RC] = WR_READ_MASK,
+ },
+ },
+ [IB_WR_LOCAL_INV] = {
+ .name = "IB_WR_LOCAL_INV",
+ .mask = {
+ [IB_QPT_RC] = WR_REG_MASK,
+ },
+ },
+ [IB_WR_REG_MR] = {
+ .name = "IB_WR_REG_MR",
+ .mask = {
+ [IB_QPT_RC] = WR_REG_MASK,
+ },
+ },
+};
+
+struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
+ [IB_OPCODE_RC_SEND_FIRST] = {
+ .name = "IB_OPCODE_RC_SEND_FIRST",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
+ | RXE_SEND_MASK | RXE_START_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_MIDDLE] = {
+ .name = "IB_OPCODE_RC_SEND_MIDDLE]",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_LAST] = {
+ .name = "IB_OPCODE_RC_SEND_LAST",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
+ | RXE_SEND_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE",
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IMMDT] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_ONLY] = {
+ .name = "IB_OPCODE_RC_SEND_ONLY",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
+ | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IMMDT] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_WRITE_FIRST] = {
+ .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = {
+ .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_WRITE_LAST] = {
+ .name = "IB_OPCODE_RC_RDMA_WRITE_LAST",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IMMDT] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_WRITE_ONLY] = {
+ .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_START_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_READ_REQUEST] = {
+ .name = "IB_OPCODE_RC_RDMA_READ_REQUEST",
+ .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = {
+ .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST",
+ .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+ | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_AETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = {
+ .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE",
+ .mask = RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = {
+ .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST",
+ .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_AETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = {
+ .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY",
+ .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_AETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_ACKNOWLEDGE] = {
+ .name = "IB_OPCODE_RC_ACKNOWLEDGE",
+ .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_AETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = {
+ .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE",
+ .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_AETH] = RXE_BTH_BYTES,
+ [RXE_ATMACK] = RXE_BTH_BYTES
+ + RXE_AETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_ATMACK_BYTES + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_COMPARE_SWAP] = {
+ .name = "IB_OPCODE_RC_COMPARE_SWAP",
+ .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_ATMETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_ATMETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_FETCH_ADD] = {
+ .name = "IB_OPCODE_RC_FETCH_ADD",
+ .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_ATMETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_ATMETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = {
+ .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE",
+ .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = {
+ .name = "IB_OPCODE_RC_SEND_ONLY_INV",
+ .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IETH_BYTES,
+ }
+ },
+
+ /* UC */
+ [IB_OPCODE_UC_SEND_FIRST] = {
+ .name = "IB_OPCODE_UC_SEND_FIRST",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK
+ | RXE_SEND_MASK | RXE_START_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_SEND_MIDDLE] = {
+ .name = "IB_OPCODE_UC_SEND_MIDDLE",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_SEND_LAST] = {
+ .name = "IB_OPCODE_UC_SEND_LAST",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
+ | RXE_SEND_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE",
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IMMDT] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_SEND_ONLY] = {
+ .name = "IB_OPCODE_UC_SEND_ONLY",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK
+ | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IMMDT] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_RDMA_WRITE_FIRST] = {
+ .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = {
+ .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_RDMA_WRITE_LAST] = {
+ .name = "IB_OPCODE_UC_RDMA_WRITE_LAST",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE",
+ .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_IMMDT] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_RDMA_WRITE_ONLY] = {
+ .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_START_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+
+ /* RD */
+ [IB_OPCODE_RD_SEND_FIRST] = {
+ .name = "IB_OPCODE_RD_SEND_FIRST",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_SEND_MIDDLE] = {
+ .name = "IB_OPCODE_RD_SEND_MIDDLE",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_SEND_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_SEND_LAST] = {
+ .name = "IB_OPCODE_RD_SEND_LAST",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
+ | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_SEND_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_SEND_ONLY] = {
+ .name = "IB_OPCODE_RD_SEND_ONLY",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
+ | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_WRITE_FIRST] = {
+ .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
+ | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = {
+ .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_WRITE_LAST] = {
+ .name = "IB_OPCODE_RD_RDMA_WRITE_LAST",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK
+ | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_WRITE_ONLY] = {
+ .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
+ | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_WRITE_MASK | RXE_START_MASK
+ | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_RETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
+ | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_WRITE_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES
+ + RXE_DETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_RETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_RETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_READ_REQUEST] = {
+ .name = "IB_OPCODE_RD_RDMA_READ_REQUEST",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK
+ | RXE_REQ_MASK | RXE_READ_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RETH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = {
+ .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST",
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK
+ | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+ | RXE_START_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = {
+ .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE",
+ .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = {
+ .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST",
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_ACK_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = {
+ .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY",
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK
+ | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_ACKNOWLEDGE] = {
+ .name = "IB_OPCODE_RD_ACKNOWLEDGE",
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = {
+ .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE",
+ .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK
+ | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_AETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_ATMACK] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_AETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_COMPARE_SWAP] = {
+ .name = "RD_COMPARE_SWAP",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
+ | RXE_REQ_MASK | RXE_ATOMIC_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_ATMETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ + RXE_ATMETH_BYTES
+ + RXE_DETH_BYTES +
+ + RXE_RDETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RD_FETCH_ADD] = {
+ .name = "IB_OPCODE_RD_FETCH_ADD",
+ .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK
+ | RXE_REQ_MASK | RXE_ATOMIC_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES
+ + RXE_RDETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RDETH] = RXE_BTH_BYTES,
+ [RXE_DETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES,
+ [RXE_ATMETH] = RXE_BTH_BYTES
+ + RXE_RDETH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES +
+ + RXE_ATMETH_BYTES
+ + RXE_DETH_BYTES +
+ + RXE_RDETH_BYTES,
+ }
+ },
+
+ /* UD */
+ [IB_OPCODE_UD_SEND_ONLY] = {
+ .name = "IB_OPCODE_UD_SEND_ONLY",
+ .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
+ | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
+ | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_DETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_DETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_DETH_BYTES,
+ }
+ },
+ [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = {
+ .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE",
+ .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK
+ | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK
+ | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_DETH] = RXE_BTH_BYTES,
+ [RXE_IMMDT] = RXE_BTH_BYTES
+ + RXE_DETH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES
+ + RXE_DETH_BYTES
+ + RXE_IMMDT_BYTES,
+ }
+ },
+
+};
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_OPCODE_H
+#define RXE_OPCODE_H
+
+/*
+ * contains header bit mask definitions and header lengths
+ * declaration of the rxe_opcode_info struct and
+ * rxe_wr_opcode_info struct
+ */
+
+enum rxe_wr_mask {
+ WR_INLINE_MASK = BIT(0),
+ WR_ATOMIC_MASK = BIT(1),
+ WR_SEND_MASK = BIT(2),
+ WR_READ_MASK = BIT(3),
+ WR_WRITE_MASK = BIT(4),
+ WR_LOCAL_MASK = BIT(5),
+ WR_REG_MASK = BIT(6),
+
+ WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
+ WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
+ WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
+ WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK,
+};
+
+#define WR_MAX_QPT (8)
+
+struct rxe_wr_opcode_info {
+ char *name;
+ enum rxe_wr_mask mask[WR_MAX_QPT];
+};
+
+extern struct rxe_wr_opcode_info rxe_wr_opcode_info[];
+
+enum rxe_hdr_type {
+ RXE_LRH,
+ RXE_GRH,
+ RXE_BTH,
+ RXE_RETH,
+ RXE_AETH,
+ RXE_ATMETH,
+ RXE_ATMACK,
+ RXE_IETH,
+ RXE_RDETH,
+ RXE_DETH,
+ RXE_IMMDT,
+ RXE_PAYLOAD,
+ NUM_HDR_TYPES
+};
+
+enum rxe_hdr_mask {
+ RXE_LRH_MASK = BIT(RXE_LRH),
+ RXE_GRH_MASK = BIT(RXE_GRH),
+ RXE_BTH_MASK = BIT(RXE_BTH),
+ RXE_IMMDT_MASK = BIT(RXE_IMMDT),
+ RXE_RETH_MASK = BIT(RXE_RETH),
+ RXE_AETH_MASK = BIT(RXE_AETH),
+ RXE_ATMETH_MASK = BIT(RXE_ATMETH),
+ RXE_ATMACK_MASK = BIT(RXE_ATMACK),
+ RXE_IETH_MASK = BIT(RXE_IETH),
+ RXE_RDETH_MASK = BIT(RXE_RDETH),
+ RXE_DETH_MASK = BIT(RXE_DETH),
+ RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),
+
+ RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),
+ RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1),
+ RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2),
+ RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),
+ RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),
+ RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
+
+ RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),
+ RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),
+
+ RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),
+ RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
+ RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),
+
+ RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
+
+ RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK),
+ RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK),
+};
+
+#define OPCODE_NONE (-1)
+#define RXE_NUM_OPCODE 256
+
+struct rxe_opcode_info {
+ char *name;
+ enum rxe_hdr_mask mask;
+ int length;
+ int offset[NUM_HDR_TYPES];
+};
+
+extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE];
+
+#endif /* RXE_OPCODE_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_PARAM_H
+#define RXE_PARAM_H
+
+static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu)
+{
+ if (mtu < 256)
+ return 0;
+ else if (mtu < 512)
+ return IB_MTU_256;
+ else if (mtu < 1024)
+ return IB_MTU_512;
+ else if (mtu < 2048)
+ return IB_MTU_1024;
+ else if (mtu < 4096)
+ return IB_MTU_2048;
+ else
+ return IB_MTU_4096;
+}
+
+/* Find the IB mtu for a given network MTU. */
+static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
+{
+ mtu -= RXE_MAX_HDR_LENGTH;
+
+ return rxe_mtu_int_to_enum(mtu);
+}
+
+/* default/initial rxe device parameter settings */
+enum rxe_device_param {
+ RXE_FW_VER = 0,
+ RXE_MAX_MR_SIZE = -1ull,
+ RXE_PAGE_SIZE_CAP = 0xfffff000,
+ RXE_VENDOR_ID = 0,
+ RXE_VENDOR_PART_ID = 0,
+ RXE_HW_VER = 0,
+ RXE_MAX_QP = 0x10000,
+ RXE_MAX_QP_WR = 0x4000,
+ RXE_MAX_INLINE_DATA = 400,
+ RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR
+ | IB_DEVICE_BAD_QKEY_CNTR
+ | IB_DEVICE_AUTO_PATH_MIG
+ | IB_DEVICE_CHANGE_PHY_PORT
+ | IB_DEVICE_UD_AV_PORT_ENFORCE
+ | IB_DEVICE_PORT_ACTIVE_EVENT
+ | IB_DEVICE_SYS_IMAGE_GUID
+ | IB_DEVICE_RC_RNR_NAK_GEN
+ | IB_DEVICE_SRQ_RESIZE
+ | IB_DEVICE_MEM_MGT_EXTENSIONS,
+ RXE_MAX_SGE = 32,
+ RXE_MAX_SGE_RD = 32,
+ RXE_MAX_CQ = 16384,
+ RXE_MAX_LOG_CQE = 13,
+ RXE_MAX_MR = 2 * 1024,
+ RXE_MAX_PD = 0x7ffc,
+ RXE_MAX_QP_RD_ATOM = 128,
+ RXE_MAX_EE_RD_ATOM = 0,
+ RXE_MAX_RES_RD_ATOM = 0x3f000,
+ RXE_MAX_QP_INIT_RD_ATOM = 128,
+ RXE_MAX_EE_INIT_RD_ATOM = 0,
+ RXE_ATOMIC_CAP = 1,
+ RXE_MAX_EE = 0,
+ RXE_MAX_RDD = 0,
+ RXE_MAX_MW = 0,
+ RXE_MAX_RAW_IPV6_QP = 0,
+ RXE_MAX_RAW_ETHY_QP = 0,
+ RXE_MAX_MCAST_GRP = 8192,
+ RXE_MAX_MCAST_QP_ATTACH = 56,
+ RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000,
+ RXE_MAX_AH = 100,
+ RXE_MAX_FMR = 0,
+ RXE_MAX_MAP_PER_FMR = 0,
+ RXE_MAX_SRQ = 960,
+ RXE_MAX_SRQ_WR = 0x4000,
+ RXE_MIN_SRQ_WR = 1,
+ RXE_MAX_SRQ_SGE = 27,
+ RXE_MIN_SRQ_SGE = 1,
+ RXE_MAX_FMR_PAGE_LIST_LEN = 512,
+ RXE_MAX_PKEYS = 64,
+ RXE_LOCAL_CA_ACK_DELAY = 15,
+
+ RXE_MAX_UCONTEXT = 512,
+
+ RXE_NUM_PORT = 1,
+ RXE_NUM_COMP_VECTORS = 1,
+
+ RXE_MIN_QP_INDEX = 16,
+ RXE_MAX_QP_INDEX = 0x00020000,
+
+ RXE_MIN_SRQ_INDEX = 0x00020001,
+ RXE_MAX_SRQ_INDEX = 0x00040000,
+
+ RXE_MIN_MR_INDEX = 0x00000001,
+ RXE_MAX_MR_INDEX = 0x00040000,
+ RXE_MIN_MW_INDEX = 0x00040001,
+ RXE_MAX_MW_INDEX = 0x00060000,
+ RXE_MAX_PKT_PER_ACK = 64,
+
+ RXE_MAX_UNACKED_PSNS = 128,
+
+ /* Max inflight SKBs per queue pair */
+ RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
+ RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
+
+ /* Delay before calling arbiter timer */
+ RXE_NSEC_ARB_TIMER_DELAY = 200,
+};
+
+/* default/initial rxe port parameters */
+enum rxe_port_param {
+ RXE_PORT_STATE = IB_PORT_DOWN,
+ RXE_PORT_MAX_MTU = IB_MTU_4096,
+ RXE_PORT_ACTIVE_MTU = IB_MTU_256,
+ RXE_PORT_GID_TBL_LEN = 1024,
+ RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
+ RXE_PORT_MAX_MSG_SZ = 0x800000,
+ RXE_PORT_BAD_PKEY_CNTR = 0,
+ RXE_PORT_QKEY_VIOL_CNTR = 0,
+ RXE_PORT_LID = 0,
+ RXE_PORT_SM_LID = 0,
+ RXE_PORT_SM_SL = 0,
+ RXE_PORT_LMC = 0,
+ RXE_PORT_MAX_VL_NUM = 1,
+ RXE_PORT_SUBNET_TIMEOUT = 0,
+ RXE_PORT_INIT_TYPE_REPLY = 0,
+ RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X,
+ RXE_PORT_ACTIVE_SPEED = 1,
+ RXE_PORT_PKEY_TBL_LEN = 64,
+ RXE_PORT_PHYS_STATE = 2,
+ RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL,
+};
+
+/* default/initial port info parameters */
+enum rxe_port_info_param {
+ RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */
+ RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */
+ RXE_PORT_INFO_OPER_VL = 1, /* 1 */
+};
+
+#endif /* RXE_PARAM_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+/* info about object pools
+ * note that mr and mw share a single index space
+ * so that one can map an lkey to the correct type of object
+ */
+struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
+ [RXE_TYPE_UC] = {
+ .name = "rxe-uc",
+ .size = sizeof(struct rxe_ucontext),
+ },
+ [RXE_TYPE_PD] = {
+ .name = "rxe-pd",
+ .size = sizeof(struct rxe_pd),
+ },
+ [RXE_TYPE_AH] = {
+ .name = "rxe-ah",
+ .size = sizeof(struct rxe_ah),
+ .flags = RXE_POOL_ATOMIC,
+ },
+ [RXE_TYPE_SRQ] = {
+ .name = "rxe-srq",
+ .size = sizeof(struct rxe_srq),
+ .flags = RXE_POOL_INDEX,
+ .min_index = RXE_MIN_SRQ_INDEX,
+ .max_index = RXE_MAX_SRQ_INDEX,
+ },
+ [RXE_TYPE_QP] = {
+ .name = "rxe-qp",
+ .size = sizeof(struct rxe_qp),
+ .cleanup = rxe_qp_cleanup,
+ .flags = RXE_POOL_INDEX,
+ .min_index = RXE_MIN_QP_INDEX,
+ .max_index = RXE_MAX_QP_INDEX,
+ },
+ [RXE_TYPE_CQ] = {
+ .name = "rxe-cq",
+ .size = sizeof(struct rxe_cq),
+ .cleanup = rxe_cq_cleanup,
+ },
+ [RXE_TYPE_MR] = {
+ .name = "rxe-mr",
+ .size = sizeof(struct rxe_mem),
+ .cleanup = rxe_mem_cleanup,
+ .flags = RXE_POOL_INDEX,
+ .max_index = RXE_MAX_MR_INDEX,
+ .min_index = RXE_MIN_MR_INDEX,
+ },
+ [RXE_TYPE_MW] = {
+ .name = "rxe-mw",
+ .size = sizeof(struct rxe_mem),
+ .flags = RXE_POOL_INDEX,
+ .max_index = RXE_MAX_MW_INDEX,
+ .min_index = RXE_MIN_MW_INDEX,
+ },
+ [RXE_TYPE_MC_GRP] = {
+ .name = "rxe-mc_grp",
+ .size = sizeof(struct rxe_mc_grp),
+ .cleanup = rxe_mc_cleanup,
+ .flags = RXE_POOL_KEY,
+ .key_offset = offsetof(struct rxe_mc_grp, mgid),
+ .key_size = sizeof(union ib_gid),
+ },
+ [RXE_TYPE_MC_ELEM] = {
+ .name = "rxe-mc_elem",
+ .size = sizeof(struct rxe_mc_elem),
+ .flags = RXE_POOL_ATOMIC,
+ },
+};
+
+static inline char *pool_name(struct rxe_pool *pool)
+{
+ return rxe_type_info[pool->type].name;
+}
+
+static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
+{
+ return rxe_type_info[pool->type].cache;
+}
+
+static inline enum rxe_elem_type rxe_type(void *arg)
+{
+ struct rxe_pool_entry *elem = arg;
+
+ return elem->pool->type;
+}
+
+int rxe_cache_init(void)
+{
+ int err;
+ int i;
+ size_t size;
+ struct rxe_type_info *type;
+
+ for (i = 0; i < RXE_NUM_TYPES; i++) {
+ type = &rxe_type_info[i];
+ size = ALIGN(type->size, RXE_POOL_ALIGN);
+ type->cache = kmem_cache_create(type->name, size,
+ RXE_POOL_ALIGN,
+ RXE_POOL_CACHE_FLAGS, NULL);
+ if (!type->cache) {
+ pr_err("Unable to init kmem cache for %s\n",
+ type->name);
+ err = -ENOMEM;
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ while (--i >= 0) {
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
+
+ return err;
+}
+
+void rxe_cache_exit(void)
+{
+ int i;
+ struct rxe_type_info *type;
+
+ for (i = 0; i < RXE_NUM_TYPES; i++) {
+ type = &rxe_type_info[i];
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
+}
+
+static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
+{
+ int err = 0;
+ size_t size;
+
+ if ((max - min + 1) < pool->max_elem) {
+ pr_warn("not enough indices for max_elem\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ pool->max_index = max;
+ pool->min_index = min;
+
+ size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
+ pool->table = kmalloc(size, GFP_KERNEL);
+ if (!pool->table) {
+ pr_warn("no memory for bit table\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ pool->table_size = size;
+ bitmap_zero(pool->table, max - min + 1);
+
+out:
+ return err;
+}
+
+int rxe_pool_init(
+ struct rxe_dev *rxe,
+ struct rxe_pool *pool,
+ enum rxe_elem_type type,
+ unsigned max_elem)
+{
+ int err = 0;
+ size_t size = rxe_type_info[type].size;
+
+ memset(pool, 0, sizeof(*pool));
+
+ pool->rxe = rxe;
+ pool->type = type;
+ pool->max_elem = max_elem;
+ pool->elem_size = ALIGN(size, RXE_POOL_ALIGN);
+ pool->flags = rxe_type_info[type].flags;
+ pool->tree = RB_ROOT;
+ pool->cleanup = rxe_type_info[type].cleanup;
+
+ atomic_set(&pool->num_elem, 0);
+
+ kref_init(&pool->ref_cnt);
+
+ spin_lock_init(&pool->pool_lock);
+
+ if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
+ err = rxe_pool_init_index(pool,
+ rxe_type_info[type].max_index,
+ rxe_type_info[type].min_index);
+ if (err)
+ goto out;
+ }
+
+ if (rxe_type_info[type].flags & RXE_POOL_KEY) {
+ pool->key_offset = rxe_type_info[type].key_offset;
+ pool->key_size = rxe_type_info[type].key_size;
+ }
+
+ pool->state = rxe_pool_valid;
+
+out:
+ return err;
+}
+
+static void rxe_pool_release(struct kref *kref)
+{
+ struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
+
+ pool->state = rxe_pool_invalid;
+ kfree(pool->table);
+}
+
+static void rxe_pool_put(struct rxe_pool *pool)
+{
+ kref_put(&pool->ref_cnt, rxe_pool_release);
+}
+
+int rxe_pool_cleanup(struct rxe_pool *pool)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ pool->state = rxe_pool_invalid;
+ if (atomic_read(&pool->num_elem) > 0)
+ pr_warn("%s pool destroyed with unfree'd elem\n",
+ pool_name(pool));
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ rxe_pool_put(pool);
+
+ return 0;
+}
+
+static u32 alloc_index(struct rxe_pool *pool)
+{
+ u32 index;
+ u32 range = pool->max_index - pool->min_index + 1;
+
+ index = find_next_zero_bit(pool->table, range, pool->last);
+ if (index >= range)
+ index = find_first_zero_bit(pool->table, range);
+
+ set_bit(index, pool->table);
+ pool->last = index;
+ return index + pool->min_index;
+}
+
+static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
+{
+ struct rb_node **link = &pool->tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct rxe_pool_entry *elem;
+
+ while (*link) {
+ parent = *link;
+ elem = rb_entry(parent, struct rxe_pool_entry, node);
+
+ if (elem->index == new->index) {
+ pr_warn("element already exists!\n");
+ goto out;
+ }
+
+ if (elem->index > new->index)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, &pool->tree);
+out:
+ return;
+}
+
+static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
+{
+ struct rb_node **link = &pool->tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct rxe_pool_entry *elem;
+ int cmp;
+
+ while (*link) {
+ parent = *link;
+ elem = rb_entry(parent, struct rxe_pool_entry, node);
+
+ cmp = memcmp((u8 *)elem + pool->key_offset,
+ (u8 *)new + pool->key_offset, pool->key_size);
+
+ if (cmp == 0) {
+ pr_warn("key already exists!\n");
+ goto out;
+ }
+
+ if (cmp > 0)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, &pool->tree);
+out:
+ return;
+}
+
+void rxe_add_key(void *arg, void *key)
+{
+ struct rxe_pool_entry *elem = arg;
+ struct rxe_pool *pool = elem->pool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
+ insert_key(pool, elem);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void rxe_drop_key(void *arg)
+{
+ struct rxe_pool_entry *elem = arg;
+ struct rxe_pool *pool = elem->pool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ rb_erase(&elem->node, &pool->tree);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void rxe_add_index(void *arg)
+{
+ struct rxe_pool_entry *elem = arg;
+ struct rxe_pool *pool = elem->pool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ elem->index = alloc_index(pool);
+ insert_index(pool, elem);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void rxe_drop_index(void *arg)
+{
+ struct rxe_pool_entry *elem = arg;
+ struct rxe_pool *pool = elem->pool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ clear_bit(elem->index - pool->min_index, pool->table);
+ rb_erase(&elem->node, &pool->tree);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+}
+
+void *rxe_alloc(struct rxe_pool *pool)
+{
+ struct rxe_pool_entry *elem;
+ unsigned long flags;
+
+ might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ if (pool->state != rxe_pool_valid) {
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ return NULL;
+ }
+ kref_get(&pool->ref_cnt);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ kref_get(&pool->rxe->ref_cnt);
+
+ if (atomic_inc_return(&pool->num_elem) > pool->max_elem) {
+ atomic_dec(&pool->num_elem);
+ rxe_dev_put(pool->rxe);
+ rxe_pool_put(pool);
+ return NULL;
+ }
+
+ elem = kmem_cache_zalloc(pool_cache(pool),
+ (pool->flags & RXE_POOL_ATOMIC) ?
+ GFP_ATOMIC : GFP_KERNEL);
+
+ elem->pool = pool;
+ kref_init(&elem->ref_cnt);
+
+ return elem;
+}
+
+void rxe_elem_release(struct kref *kref)
+{
+ struct rxe_pool_entry *elem =
+ container_of(kref, struct rxe_pool_entry, ref_cnt);
+ struct rxe_pool *pool = elem->pool;
+
+ if (pool->cleanup)
+ pool->cleanup(elem);
+
+ kmem_cache_free(pool_cache(pool), elem);
+ atomic_dec(&pool->num_elem);
+ rxe_dev_put(pool->rxe);
+ rxe_pool_put(pool);
+}
+
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
+{
+ struct rb_node *node = NULL;
+ struct rxe_pool_entry *elem = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+
+ if (pool->state != rxe_pool_valid)
+ goto out;
+
+ node = pool->tree.rb_node;
+
+ while (node) {
+ elem = rb_entry(node, struct rxe_pool_entry, node);
+
+ if (elem->index > index)
+ node = node->rb_left;
+ else if (elem->index < index)
+ node = node->rb_right;
+ else
+ break;
+ }
+
+ if (node)
+ kref_get(&elem->ref_cnt);
+
+out:
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ return node ? (void *)elem : NULL;
+}
+
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
+{
+ struct rb_node *node = NULL;
+ struct rxe_pool_entry *elem = NULL;
+ int cmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+
+ if (pool->state != rxe_pool_valid)
+ goto out;
+
+ node = pool->tree.rb_node;
+
+ while (node) {
+ elem = rb_entry(node, struct rxe_pool_entry, node);
+
+ cmp = memcmp((u8 *)elem + pool->key_offset,
+ key, pool->key_size);
+
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else
+ break;
+ }
+
+ if (node)
+ kref_get(&elem->ref_cnt);
+
+out:
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ return node ? ((void *)elem) : NULL;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_POOL_H
+#define RXE_POOL_H
+
+#define RXE_POOL_ALIGN (16)
+#define RXE_POOL_CACHE_FLAGS (0)
+
+enum rxe_pool_flags {
+ RXE_POOL_ATOMIC = BIT(0),
+ RXE_POOL_INDEX = BIT(1),
+ RXE_POOL_KEY = BIT(2),
+};
+
+enum rxe_elem_type {
+ RXE_TYPE_UC,
+ RXE_TYPE_PD,
+ RXE_TYPE_AH,
+ RXE_TYPE_SRQ,
+ RXE_TYPE_QP,
+ RXE_TYPE_CQ,
+ RXE_TYPE_MR,
+ RXE_TYPE_MW,
+ RXE_TYPE_MC_GRP,
+ RXE_TYPE_MC_ELEM,
+ RXE_NUM_TYPES, /* keep me last */
+};
+
+struct rxe_type_info {
+ char *name;
+ size_t size;
+ void (*cleanup)(void *obj);
+ enum rxe_pool_flags flags;
+ u32 max_index;
+ u32 min_index;
+ size_t key_offset;
+ size_t key_size;
+ struct kmem_cache *cache;
+};
+
+extern struct rxe_type_info rxe_type_info[];
+
+enum rxe_pool_state {
+ rxe_pool_invalid,
+ rxe_pool_valid,
+};
+
+struct rxe_pool_entry {
+ struct rxe_pool *pool;
+ struct kref ref_cnt;
+ struct list_head list;
+
+ /* only used if indexed or keyed */
+ struct rb_node node;
+ u32 index;
+};
+
+struct rxe_pool {
+ struct rxe_dev *rxe;
+ spinlock_t pool_lock; /* pool spinlock */
+ size_t elem_size;
+ struct kref ref_cnt;
+ void (*cleanup)(void *obj);
+ enum rxe_pool_state state;
+ enum rxe_pool_flags flags;
+ enum rxe_elem_type type;
+
+ unsigned int max_elem;
+ atomic_t num_elem;
+
+ /* only used if indexed or keyed */
+ struct rb_root tree;
+ unsigned long *table;
+ size_t table_size;
+ u32 max_index;
+ u32 min_index;
+ u32 last;
+ size_t key_offset;
+ size_t key_size;
+};
+
+/* initialize slab caches for managed objects */
+int rxe_cache_init(void);
+
+/* cleanup slab caches for managed objects */
+void rxe_cache_exit(void);
+
+/* initialize a pool of objects with given limit on
+ * number of elements. gets parameters from rxe_type_info
+ * pool elements will be allocated out of a slab cache
+ */
+int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
+ enum rxe_elem_type type, u32 max_elem);
+
+/* free resources from object pool */
+int rxe_pool_cleanup(struct rxe_pool *pool);
+
+/* allocate an object from pool */
+void *rxe_alloc(struct rxe_pool *pool);
+
+/* assign an index to an indexed object and insert object into
+ * pool's rb tree
+ */
+void rxe_add_index(void *elem);
+
+/* drop an index and remove object from rb tree */
+void rxe_drop_index(void *elem);
+
+/* assign a key to a keyed object and insert object into
+ * pool's rb tree
+ */
+void rxe_add_key(void *elem, void *key);
+
+/* remove elem from rb tree */
+void rxe_drop_key(void *elem);
+
+/* lookup an indexed object from index. takes a reference on object */
+void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);
+
+/* lookup keyed object from key. takes a reference on the object */
+void *rxe_pool_get_key(struct rxe_pool *pool, void *key);
+
+/* cleanup an object when all references are dropped */
+void rxe_elem_release(struct kref *kref);
+
+/* take a reference on an object */
+#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt)
+
+/* drop a reference on an object */
+#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release)
+
+#endif /* RXE_POOL_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+#include "rxe_task.h"
+
+char *rxe_qp_state_name[] = {
+ [QP_STATE_RESET] = "RESET",
+ [QP_STATE_INIT] = "INIT",
+ [QP_STATE_READY] = "READY",
+ [QP_STATE_DRAIN] = "DRAIN",
+ [QP_STATE_DRAINED] = "DRAINED",
+ [QP_STATE_ERROR] = "ERROR",
+};
+
+static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
+ int has_srq)
+{
+ if (cap->max_send_wr > rxe->attr.max_qp_wr) {
+ pr_warn("invalid send wr = %d > %d\n",
+ cap->max_send_wr, rxe->attr.max_qp_wr);
+ goto err1;
+ }
+
+ if (cap->max_send_sge > rxe->attr.max_sge) {
+ pr_warn("invalid send sge = %d > %d\n",
+ cap->max_send_sge, rxe->attr.max_sge);
+ goto err1;
+ }
+
+ if (!has_srq) {
+ if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
+ pr_warn("invalid recv wr = %d > %d\n",
+ cap->max_recv_wr, rxe->attr.max_qp_wr);
+ goto err1;
+ }
+
+ if (cap->max_recv_sge > rxe->attr.max_sge) {
+ pr_warn("invalid recv sge = %d > %d\n",
+ cap->max_recv_sge, rxe->attr.max_sge);
+ goto err1;
+ }
+ }
+
+ if (cap->max_inline_data > rxe->max_inline_data) {
+ pr_warn("invalid max inline data = %d > %d\n",
+ cap->max_inline_data, rxe->max_inline_data);
+ goto err1;
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
+{
+ struct ib_qp_cap *cap = &init->cap;
+ struct rxe_port *port;
+ int port_num = init->port_num;
+
+ if (!init->recv_cq || !init->send_cq) {
+ pr_warn("missing cq\n");
+ goto err1;
+ }
+
+ if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
+ goto err1;
+
+ if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
+ if (port_num != 1) {
+ pr_warn("invalid port = %d\n", port_num);
+ goto err1;
+ }
+
+ port = &rxe->port;
+
+ if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
+ pr_warn("SMI QP exists for port %d\n", port_num);
+ goto err1;
+ }
+
+ if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
+ pr_warn("GSI QP exists for port %d\n", port_num);
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n)
+{
+ qp->resp.res_head = 0;
+ qp->resp.res_tail = 0;
+ qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL);
+
+ if (!qp->resp.resources)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void free_rd_atomic_resources(struct rxe_qp *qp)
+{
+ if (qp->resp.resources) {
+ int i;
+
+ for (i = 0; i < qp->attr.max_rd_atomic; i++) {
+ struct resp_res *res = &qp->resp.resources[i];
+
+ free_rd_atomic_resource(qp, res);
+ }
+ kfree(qp->resp.resources);
+ qp->resp.resources = NULL;
+ }
+}
+
+void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
+{
+ if (res->type == RXE_ATOMIC_MASK) {
+ rxe_drop_ref(qp);
+ kfree_skb(res->atomic.skb);
+ } else if (res->type == RXE_READ_MASK) {
+ if (res->read.mr)
+ rxe_drop_ref(res->read.mr);
+ }
+ res->type = 0;
+}
+
+static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
+{
+ int i;
+ struct resp_res *res;
+
+ if (qp->resp.resources) {
+ for (i = 0; i < qp->attr.max_rd_atomic; i++) {
+ res = &qp->resp.resources[i];
+ free_rd_atomic_resource(qp, res);
+ }
+ }
+}
+
+static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct ib_qp_init_attr *init)
+{
+ struct rxe_port *port;
+ u32 qpn;
+
+ qp->sq_sig_type = init->sq_sig_type;
+ qp->attr.path_mtu = 1;
+ qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu);
+
+ qpn = qp->pelem.index;
+ port = &rxe->port;
+
+ switch (init->qp_type) {
+ case IB_QPT_SMI:
+ qp->ibqp.qp_num = 0;
+ port->qp_smi_index = qpn;
+ qp->attr.port_num = init->port_num;
+ break;
+
+ case IB_QPT_GSI:
+ qp->ibqp.qp_num = 1;
+ port->qp_gsi_index = qpn;
+ qp->attr.port_num = init->port_num;
+ break;
+
+ default:
+ qp->ibqp.qp_num = qpn;
+ break;
+ }
+
+ INIT_LIST_HEAD(&qp->grp_list);
+
+ skb_queue_head_init(&qp->send_pkts);
+
+ spin_lock_init(&qp->grp_lock);
+ spin_lock_init(&qp->state_lock);
+
+ atomic_set(&qp->ssn, 0);
+ atomic_set(&qp->skb_out, 0);
+}
+
+static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct ib_qp_init_attr *init,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ int err;
+ int wqe_size;
+
+ err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
+ if (err < 0)
+ return err;
+ qp->sk->sk->sk_user_data = qp;
+
+ qp->sq.max_wr = init->cap.max_send_wr;
+ qp->sq.max_sge = init->cap.max_send_sge;
+ qp->sq.max_inline = init->cap.max_inline_data;
+
+ wqe_size = max_t(int, sizeof(struct rxe_send_wqe) +
+ qp->sq.max_sge * sizeof(struct ib_sge),
+ sizeof(struct rxe_send_wqe) +
+ qp->sq.max_inline);
+
+ qp->sq.queue = rxe_queue_init(rxe,
+ &qp->sq.max_wr,
+ wqe_size);
+ if (!qp->sq.queue)
+ return -ENOMEM;
+
+ err = do_mmap_info(rxe, udata, true,
+ context, qp->sq.queue->buf,
+ qp->sq.queue->buf_size, &qp->sq.queue->ip);
+
+ if (err) {
+ kvfree(qp->sq.queue->buf);
+ kfree(qp->sq.queue);
+ return err;
+ }
+
+ qp->req.wqe_index = producer_index(qp->sq.queue);
+ qp->req.state = QP_STATE_RESET;
+ qp->req.opcode = -1;
+ qp->comp.opcode = -1;
+
+ spin_lock_init(&qp->sq.sq_lock);
+ skb_queue_head_init(&qp->req_pkts);
+
+ rxe_init_task(rxe, &qp->req.task, qp,
+ rxe_requester, "req");
+ rxe_init_task(rxe, &qp->comp.task, qp,
+ rxe_completer, "comp");
+
+ init_timer(&qp->rnr_nak_timer);
+ qp->rnr_nak_timer.function = rnr_nak_timer;
+ qp->rnr_nak_timer.data = (unsigned long)qp;
+
+ init_timer(&qp->retrans_timer);
+ qp->retrans_timer.function = retransmit_timer;
+ qp->retrans_timer.data = (unsigned long)qp;
+ qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
+
+ return 0;
+}
+
+static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct ib_qp_init_attr *init,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ int err;
+ int wqe_size;
+
+ if (!qp->srq) {
+ qp->rq.max_wr = init->cap.max_recv_wr;
+ qp->rq.max_sge = init->cap.max_recv_sge;
+
+ wqe_size = rcv_wqe_size(qp->rq.max_sge);
+
+ pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n",
+ qp->rq.max_wr, qp->rq.max_sge, wqe_size);
+
+ qp->rq.queue = rxe_queue_init(rxe,
+ &qp->rq.max_wr,
+ wqe_size);
+ if (!qp->rq.queue)
+ return -ENOMEM;
+
+ err = do_mmap_info(rxe, udata, false, context,
+ qp->rq.queue->buf,
+ qp->rq.queue->buf_size,
+ &qp->rq.queue->ip);
+ if (err) {
+ kvfree(qp->rq.queue->buf);
+ kfree(qp->rq.queue);
+ return err;
+ }
+ }
+
+ spin_lock_init(&qp->rq.producer_lock);
+ spin_lock_init(&qp->rq.consumer_lock);
+
+ skb_queue_head_init(&qp->resp_pkts);
+
+ rxe_init_task(rxe, &qp->resp.task, qp,
+ rxe_responder, "resp");
+
+ qp->resp.opcode = OPCODE_NONE;
+ qp->resp.msn = 0;
+ qp->resp.state = QP_STATE_RESET;
+
+ return 0;
+}
+
+/* called by the create qp verb */
+int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
+ struct ib_qp_init_attr *init, struct ib_udata *udata,
+ struct ib_pd *ibpd)
+{
+ int err;
+ struct rxe_cq *rcq = to_rcq(init->recv_cq);
+ struct rxe_cq *scq = to_rcq(init->send_cq);
+ struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
+ struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
+
+ rxe_add_ref(pd);
+ rxe_add_ref(rcq);
+ rxe_add_ref(scq);
+ if (srq)
+ rxe_add_ref(srq);
+
+ qp->pd = pd;
+ qp->rcq = rcq;
+ qp->scq = scq;
+ qp->srq = srq;
+
+ rxe_qp_init_misc(rxe, qp, init);
+
+ err = rxe_qp_init_req(rxe, qp, init, context, udata);
+ if (err)
+ goto err1;
+
+ err = rxe_qp_init_resp(rxe, qp, init, context, udata);
+ if (err)
+ goto err2;
+
+ qp->attr.qp_state = IB_QPS_RESET;
+ qp->valid = 1;
+
+ return 0;
+
+err2:
+ rxe_queue_cleanup(qp->sq.queue);
+err1:
+ if (srq)
+ rxe_drop_ref(srq);
+ rxe_drop_ref(scq);
+ rxe_drop_ref(rcq);
+ rxe_drop_ref(pd);
+
+ return err;
+}
+
+/* called by the query qp verb */
+int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
+{
+ init->event_handler = qp->ibqp.event_handler;
+ init->qp_context = qp->ibqp.qp_context;
+ init->send_cq = qp->ibqp.send_cq;
+ init->recv_cq = qp->ibqp.recv_cq;
+ init->srq = qp->ibqp.srq;
+
+ init->cap.max_send_wr = qp->sq.max_wr;
+ init->cap.max_send_sge = qp->sq.max_sge;
+ init->cap.max_inline_data = qp->sq.max_inline;
+
+ if (!qp->srq) {
+ init->cap.max_recv_wr = qp->rq.max_wr;
+ init->cap.max_recv_sge = qp->rq.max_sge;
+ }
+
+ init->sq_sig_type = qp->sq_sig_type;
+
+ init->qp_type = qp->ibqp.qp_type;
+ init->port_num = 1;
+
+ return 0;
+}
+
+/* called by the modify qp verb, this routine checks all the parameters before
+ * making any changes
+ */
+int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct ib_qp_attr *attr, int mask)
+{
+ enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state : qp->attr.qp_state;
+ enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
+ attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
+ IB_LINK_LAYER_ETHERNET)) {
+ pr_warn("invalid mask or state for qp\n");
+ goto err1;
+ }
+
+ if (mask & IB_QP_STATE) {
+ if (cur_state == IB_QPS_SQD) {
+ if (qp->req.state == QP_STATE_DRAIN &&
+ new_state != IB_QPS_ERR)
+ goto err1;
+ }
+ }
+
+ if (mask & IB_QP_PORT) {
+ if (attr->port_num != 1) {
+ pr_warn("invalid port %d\n", attr->port_num);
+ goto err1;
+ }
+ }
+
+ if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
+ goto err1;
+
+ if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
+ goto err1;
+
+ if (mask & IB_QP_ALT_PATH) {
+ if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
+ goto err1;
+ if (attr->alt_port_num != 1) {
+ pr_warn("invalid alt port %d\n", attr->alt_port_num);
+ goto err1;
+ }
+ if (attr->alt_timeout > 31) {
+ pr_warn("invalid QP alt timeout %d > 31\n",
+ attr->alt_timeout);
+ goto err1;
+ }
+ }
+
+ if (mask & IB_QP_PATH_MTU) {
+ struct rxe_port *port = &rxe->port;
+
+ enum ib_mtu max_mtu = port->attr.max_mtu;
+ enum ib_mtu mtu = attr->path_mtu;
+
+ if (mtu > max_mtu) {
+ pr_debug("invalid mtu (%d) > (%d)\n",
+ ib_mtu_enum_to_int(mtu),
+ ib_mtu_enum_to_int(max_mtu));
+ goto err1;
+ }
+ }
+
+ if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
+ pr_warn("invalid max_rd_atomic %d > %d\n",
+ attr->max_rd_atomic,
+ rxe->attr.max_qp_rd_atom);
+ goto err1;
+ }
+ }
+
+ if (mask & IB_QP_TIMEOUT) {
+ if (attr->timeout > 31) {
+ pr_warn("invalid QP timeout %d > 31\n",
+ attr->timeout);
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+/* move the qp to the reset state */
+static void rxe_qp_reset(struct rxe_qp *qp)
+{
+ /* stop tasks from running */
+ rxe_disable_task(&qp->resp.task);
+
+ /* stop request/comp */
+ if (qp->sq.queue) {
+ if (qp_type(qp) == IB_QPT_RC)
+ rxe_disable_task(&qp->comp.task);
+ rxe_disable_task(&qp->req.task);
+ }
+
+ /* move qp to the reset state */
+ qp->req.state = QP_STATE_RESET;
+ qp->resp.state = QP_STATE_RESET;
+
+ /* let state machines reset themselves drain work and packet queues
+ * etc.
+ */
+ __rxe_do_task(&qp->resp.task);
+
+ if (qp->sq.queue) {
+ __rxe_do_task(&qp->comp.task);
+ __rxe_do_task(&qp->req.task);
+ }
+
+ /* cleanup attributes */
+ atomic_set(&qp->ssn, 0);
+ qp->req.opcode = -1;
+ qp->req.need_retry = 0;
+ qp->req.noack_pkts = 0;
+ qp->resp.msn = 0;
+ qp->resp.opcode = -1;
+ qp->resp.drop_msg = 0;
+ qp->resp.goto_error = 0;
+ qp->resp.sent_psn_nak = 0;
+
+ if (qp->resp.mr) {
+ rxe_drop_ref(qp->resp.mr);
+ qp->resp.mr = NULL;
+ }
+
+ cleanup_rd_atomic_resources(qp);
+
+ /* reenable tasks */
+ rxe_enable_task(&qp->resp.task);
+
+ if (qp->sq.queue) {
+ if (qp_type(qp) == IB_QPT_RC)
+ rxe_enable_task(&qp->comp.task);
+
+ rxe_enable_task(&qp->req.task);
+ }
+}
+
+/* drain the send queue */
+static void rxe_qp_drain(struct rxe_qp *qp)
+{
+ if (qp->sq.queue) {
+ if (qp->req.state != QP_STATE_DRAINED) {
+ qp->req.state = QP_STATE_DRAIN;
+ if (qp_type(qp) == IB_QPT_RC)
+ rxe_run_task(&qp->comp.task, 1);
+ else
+ __rxe_do_task(&qp->comp.task);
+ rxe_run_task(&qp->req.task, 1);
+ }
+ }
+}
+
+/* move the qp to the error state */
+void rxe_qp_error(struct rxe_qp *qp)
+{
+ qp->req.state = QP_STATE_ERROR;
+ qp->resp.state = QP_STATE_ERROR;
+
+ /* drain work and packet queues */
+ rxe_run_task(&qp->resp.task, 1);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ rxe_run_task(&qp->comp.task, 1);
+ else
+ __rxe_do_task(&qp->comp.task);
+ rxe_run_task(&qp->req.task, 1);
+}
+
+/* called by the modify qp verb */
+int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+
+ if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
+
+ free_rd_atomic_resources(qp);
+
+ err = alloc_rd_atomic_resources(qp, max_rd_atomic);
+ if (err)
+ return err;
+
+ qp->attr.max_rd_atomic = max_rd_atomic;
+ atomic_set(&qp->req.rd_atomic, max_rd_atomic);
+ }
+
+ if (mask & IB_QP_CUR_STATE)
+ qp->attr.cur_qp_state = attr->qp_state;
+
+ if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+ qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
+
+ if (mask & IB_QP_ACCESS_FLAGS)
+ qp->attr.qp_access_flags = attr->qp_access_flags;
+
+ if (mask & IB_QP_PKEY_INDEX)
+ qp->attr.pkey_index = attr->pkey_index;
+
+ if (mask & IB_QP_PORT)
+ qp->attr.port_num = attr->port_num;
+
+ if (mask & IB_QP_QKEY)
+ qp->attr.qkey = attr->qkey;
+
+ if (mask & IB_QP_AV) {
+ ib_get_cached_gid(&rxe->ib_dev, 1,
+ attr->ah_attr.grh.sgid_index, &sgid,
+ &sgid_attr);
+ rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,
+ &attr->ah_attr);
+ rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,
+ &sgid_attr, &sgid);
+ if (sgid_attr.ndev)
+ dev_put(sgid_attr.ndev);
+ }
+
+ if (mask & IB_QP_ALT_PATH) {
+ ib_get_cached_gid(&rxe->ib_dev, 1,
+ attr->alt_ah_attr.grh.sgid_index, &sgid,
+ &sgid_attr);
+
+ rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,
+ &attr->alt_ah_attr);
+ rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr,
+ &sgid_attr, &sgid);
+ if (sgid_attr.ndev)
+ dev_put(sgid_attr.ndev);
+
+ qp->attr.alt_port_num = attr->alt_port_num;
+ qp->attr.alt_pkey_index = attr->alt_pkey_index;
+ qp->attr.alt_timeout = attr->alt_timeout;
+ }
+
+ if (mask & IB_QP_PATH_MTU) {
+ qp->attr.path_mtu = attr->path_mtu;
+ qp->mtu = ib_mtu_enum_to_int(attr->path_mtu);
+ }
+
+ if (mask & IB_QP_TIMEOUT) {
+ qp->attr.timeout = attr->timeout;
+ if (attr->timeout == 0) {
+ qp->qp_timeout_jiffies = 0;
+ } else {
+ /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */
+ int j = nsecs_to_jiffies(4096ULL << attr->timeout);
+
+ qp->qp_timeout_jiffies = j ? j : 1;
+ }
+ }
+
+ if (mask & IB_QP_RETRY_CNT) {
+ qp->attr.retry_cnt = attr->retry_cnt;
+ qp->comp.retry_cnt = attr->retry_cnt;
+ pr_debug("set retry count = %d\n", attr->retry_cnt);
+ }
+
+ if (mask & IB_QP_RNR_RETRY) {
+ qp->attr.rnr_retry = attr->rnr_retry;
+ qp->comp.rnr_retry = attr->rnr_retry;
+ pr_debug("set rnr retry count = %d\n", attr->rnr_retry);
+ }
+
+ if (mask & IB_QP_RQ_PSN) {
+ qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
+ qp->resp.psn = qp->attr.rq_psn;
+ pr_debug("set resp psn = 0x%x\n", qp->resp.psn);
+ }
+
+ if (mask & IB_QP_MIN_RNR_TIMER) {
+ qp->attr.min_rnr_timer = attr->min_rnr_timer;
+ pr_debug("set min rnr timer = 0x%x\n",
+ attr->min_rnr_timer);
+ }
+
+ if (mask & IB_QP_SQ_PSN) {
+ qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
+ qp->req.psn = qp->attr.sq_psn;
+ qp->comp.psn = qp->attr.sq_psn;
+ pr_debug("set req psn = 0x%x\n", qp->req.psn);
+ }
+
+ if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ qp->attr.max_dest_rd_atomic =
+ __roundup_pow_of_two(attr->max_dest_rd_atomic);
+ }
+
+ if (mask & IB_QP_PATH_MIG_STATE)
+ qp->attr.path_mig_state = attr->path_mig_state;
+
+ if (mask & IB_QP_DEST_QPN)
+ qp->attr.dest_qp_num = attr->dest_qp_num;
+
+ if (mask & IB_QP_STATE) {
+ qp->attr.qp_state = attr->qp_state;
+
+ switch (attr->qp_state) {
+ case IB_QPS_RESET:
+ pr_debug("qp state -> RESET\n");
+ rxe_qp_reset(qp);
+ break;
+
+ case IB_QPS_INIT:
+ pr_debug("qp state -> INIT\n");
+ qp->req.state = QP_STATE_INIT;
+ qp->resp.state = QP_STATE_INIT;
+ break;
+
+ case IB_QPS_RTR:
+ pr_debug("qp state -> RTR\n");
+ qp->resp.state = QP_STATE_READY;
+ break;
+
+ case IB_QPS_RTS:
+ pr_debug("qp state -> RTS\n");
+ qp->req.state = QP_STATE_READY;
+ break;
+
+ case IB_QPS_SQD:
+ pr_debug("qp state -> SQD\n");
+ rxe_qp_drain(qp);
+ break;
+
+ case IB_QPS_SQE:
+ pr_warn("qp state -> SQE !!?\n");
+ /* Not possible from modify_qp. */
+ break;
+
+ case IB_QPS_ERR:
+ pr_debug("qp state -> ERR\n");
+ rxe_qp_error(qp);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* called by the query qp verb */
+int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+
+ *attr = qp->attr;
+
+ attr->rq_psn = qp->resp.psn;
+ attr->sq_psn = qp->req.psn;
+
+ attr->cap.max_send_wr = qp->sq.max_wr;
+ attr->cap.max_send_sge = qp->sq.max_sge;
+ attr->cap.max_inline_data = qp->sq.max_inline;
+
+ if (!qp->srq) {
+ attr->cap.max_recv_wr = qp->rq.max_wr;
+ attr->cap.max_recv_sge = qp->rq.max_sge;
+ }
+
+ rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr);
+ rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr);
+
+ if (qp->req.state == QP_STATE_DRAIN) {
+ attr->sq_draining = 1;
+ /* applications that get this state
+ * typically spin on it. yield the
+ * processor
+ */
+ cond_resched();
+ } else {
+ attr->sq_draining = 0;
+ }
+
+ pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
+
+ return 0;
+}
+
+/* called by the destroy qp verb */
+void rxe_qp_destroy(struct rxe_qp *qp)
+{
+ qp->valid = 0;
+ qp->qp_timeout_jiffies = 0;
+ rxe_cleanup_task(&qp->resp.task);
+
+ del_timer_sync(&qp->retrans_timer);
+ del_timer_sync(&qp->rnr_nak_timer);
+
+ rxe_cleanup_task(&qp->req.task);
+ if (qp_type(qp) == IB_QPT_RC)
+ rxe_cleanup_task(&qp->comp.task);
+
+ /* flush out any receive wr's or pending requests */
+ __rxe_do_task(&qp->req.task);
+ if (qp->sq.queue) {
+ __rxe_do_task(&qp->comp.task);
+ __rxe_do_task(&qp->req.task);
+ }
+}
+
+/* called when the last reference to the qp is dropped */
+void rxe_qp_cleanup(void *arg)
+{
+ struct rxe_qp *qp = arg;
+
+ rxe_drop_all_mcast_groups(qp);
+
+ if (qp->sq.queue)
+ rxe_queue_cleanup(qp->sq.queue);
+
+ if (qp->srq)
+ rxe_drop_ref(qp->srq);
+
+ if (qp->rq.queue)
+ rxe_queue_cleanup(qp->rq.queue);
+
+ if (qp->scq)
+ rxe_drop_ref(qp->scq);
+ if (qp->rcq)
+ rxe_drop_ref(qp->rcq);
+ if (qp->pd)
+ rxe_drop_ref(qp->pd);
+
+ if (qp->resp.mr) {
+ rxe_drop_ref(qp->resp.mr);
+ qp->resp.mr = NULL;
+ }
+
+ free_rd_atomic_resources(qp);
+
+ kernel_sock_shutdown(qp->sk, SHUT_RDWR);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must retailuce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/vmalloc.h>
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+int do_mmap_info(struct rxe_dev *rxe,
+ struct ib_udata *udata,
+ bool is_req,
+ struct ib_ucontext *context,
+ struct rxe_queue_buf *buf,
+ size_t buf_size,
+ struct rxe_mmap_info **ip_p)
+{
+ int err;
+ u32 len, offset;
+ struct rxe_mmap_info *ip = NULL;
+
+ if (udata) {
+ if (is_req) {
+ len = udata->outlen - sizeof(struct mminfo);
+ offset = sizeof(struct mminfo);
+ } else {
+ len = udata->outlen;
+ offset = 0;
+ }
+
+ if (len < sizeof(ip->info))
+ goto err1;
+
+ ip = rxe_create_mmap_info(rxe, buf_size, context, buf);
+ if (!ip)
+ goto err1;
+
+ err = copy_to_user(udata->outbuf + offset, &ip->info,
+ sizeof(ip->info));
+ if (err)
+ goto err2;
+
+ spin_lock_bh(&rxe->pending_lock);
+ list_add(&ip->pending_mmaps, &rxe->pending_mmaps);
+ spin_unlock_bh(&rxe->pending_lock);
+ }
+
+ *ip_p = ip;
+
+ return 0;
+
+err2:
+ kfree(ip);
+err1:
+ return -EINVAL;
+}
+
+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
+ int *num_elem,
+ unsigned int elem_size)
+{
+ struct rxe_queue *q;
+ size_t buf_size;
+ unsigned int num_slots;
+
+ /* num_elem == 0 is allowed, but uninteresting */
+ if (*num_elem < 0)
+ goto err1;
+
+ q = kmalloc(sizeof(*q), GFP_KERNEL);
+ if (!q)
+ goto err1;
+
+ q->rxe = rxe;
+
+ /* used in resize, only need to copy used part of queue */
+ q->elem_size = elem_size;
+
+ /* pad element up to at least a cacheline and always a power of 2 */
+ if (elem_size < cache_line_size())
+ elem_size = cache_line_size();
+ elem_size = roundup_pow_of_two(elem_size);
+
+ q->log2_elem_size = order_base_2(elem_size);
+
+ num_slots = *num_elem + 1;
+ num_slots = roundup_pow_of_two(num_slots);
+ q->index_mask = num_slots - 1;
+
+ buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size;
+
+ q->buf = vmalloc_user(buf_size);
+ if (!q->buf)
+ goto err2;
+
+ q->buf->log2_elem_size = q->log2_elem_size;
+ q->buf->index_mask = q->index_mask;
+
+ q->buf_size = buf_size;
+
+ *num_elem = num_slots - 1;
+ return q;
+
+err2:
+ kfree(q);
+err1:
+ return NULL;
+}
+
+/* copies elements from original q to new q and then swaps the contents of the
+ * two q headers. This is so that if anyone is holding a pointer to q it will
+ * still work
+ */
+static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
+ unsigned int num_elem)
+{
+ if (!queue_empty(q) && (num_elem < queue_count(q)))
+ return -EINVAL;
+
+ while (!queue_empty(q)) {
+ memcpy(producer_addr(new_q), consumer_addr(q),
+ new_q->elem_size);
+ advance_producer(new_q);
+ advance_consumer(q);
+ }
+
+ swap(*q, *new_q);
+
+ return 0;
+}
+
+int rxe_queue_resize(struct rxe_queue *q,
+ unsigned int *num_elem_p,
+ unsigned int elem_size,
+ struct ib_ucontext *context,
+ struct ib_udata *udata,
+ spinlock_t *producer_lock,
+ spinlock_t *consumer_lock)
+{
+ struct rxe_queue *new_q;
+ unsigned int num_elem = *num_elem_p;
+ int err;
+ unsigned long flags = 0, flags1;
+
+ new_q = rxe_queue_init(q->rxe, &num_elem, elem_size);
+ if (!new_q)
+ return -ENOMEM;
+
+ err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf,
+ new_q->buf_size, &new_q->ip);
+ if (err) {
+ vfree(new_q->buf);
+ kfree(new_q);
+ goto err1;
+ }
+
+ spin_lock_irqsave(consumer_lock, flags1);
+
+ if (producer_lock) {
+ spin_lock_irqsave(producer_lock, flags);
+ err = resize_finish(q, new_q, num_elem);
+ spin_unlock_irqrestore(producer_lock, flags);
+ } else {
+ err = resize_finish(q, new_q, num_elem);
+ }
+
+ spin_unlock_irqrestore(consumer_lock, flags1);
+
+ rxe_queue_cleanup(new_q); /* new/old dep on err */
+ if (err)
+ goto err1;
+
+ *num_elem_p = num_elem;
+ return 0;
+
+err1:
+ return err;
+}
+
+void rxe_queue_cleanup(struct rxe_queue *q)
+{
+ if (q->ip)
+ kref_put(&q->ip->ref, rxe_mmap_release);
+ else
+ vfree(q->buf);
+
+ kfree(q);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_QUEUE_H
+#define RXE_QUEUE_H
+
+/* implements a simple circular buffer that can optionally be
+ * shared between user space and the kernel and can be resized
+
+ * the requested element size is rounded up to a power of 2
+ * and the number of elements in the buffer is also rounded
+ * up to a power of 2. Since the queue is empty when the
+ * producer and consumer indices match the maximum capacity
+ * of the queue is one less than the number of element slots
+ */
+
+/* this data structure is shared between user space and kernel
+ * space for those cases where the queue is shared. It contains
+ * the producer and consumer indices. Is also contains a copy
+ * of the queue size parameters for user space to use but the
+ * kernel must use the parameters in the rxe_queue struct
+ * this MUST MATCH the corresponding librxe struct
+ * for performance reasons arrange to have producer and consumer
+ * pointers in separate cache lines
+ * the kernel should always mask the indices to avoid accessing
+ * memory outside of the data area
+ */
+struct rxe_queue_buf {
+ __u32 log2_elem_size;
+ __u32 index_mask;
+ __u32 pad_1[30];
+ __u32 producer_index;
+ __u32 pad_2[31];
+ __u32 consumer_index;
+ __u32 pad_3[31];
+ __u8 data[0];
+};
+
+struct rxe_queue {
+ struct rxe_dev *rxe;
+ struct rxe_queue_buf *buf;
+ struct rxe_mmap_info *ip;
+ size_t buf_size;
+ size_t elem_size;
+ unsigned int log2_elem_size;
+ unsigned int index_mask;
+};
+
+int do_mmap_info(struct rxe_dev *rxe,
+ struct ib_udata *udata,
+ bool is_req,
+ struct ib_ucontext *context,
+ struct rxe_queue_buf *buf,
+ size_t buf_size,
+ struct rxe_mmap_info **ip_p);
+
+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
+ int *num_elem,
+ unsigned int elem_size);
+
+int rxe_queue_resize(struct rxe_queue *q,
+ unsigned int *num_elem_p,
+ unsigned int elem_size,
+ struct ib_ucontext *context,
+ struct ib_udata *udata,
+ /* Protect producers while resizing queue */
+ spinlock_t *producer_lock,
+ /* Protect consumers while resizing queue */
+ spinlock_t *consumer_lock);
+
+void rxe_queue_cleanup(struct rxe_queue *queue);
+
+static inline int next_index(struct rxe_queue *q, int index)
+{
+ return (index + 1) & q->buf->index_mask;
+}
+
+static inline int queue_empty(struct rxe_queue *q)
+{
+ return ((q->buf->producer_index - q->buf->consumer_index)
+ & q->index_mask) == 0;
+}
+
+static inline int queue_full(struct rxe_queue *q)
+{
+ return ((q->buf->producer_index + 1 - q->buf->consumer_index)
+ & q->index_mask) == 0;
+}
+
+static inline void advance_producer(struct rxe_queue *q)
+{
+ q->buf->producer_index = (q->buf->producer_index + 1)
+ & q->index_mask;
+}
+
+static inline void advance_consumer(struct rxe_queue *q)
+{
+ q->buf->consumer_index = (q->buf->consumer_index + 1)
+ & q->index_mask;
+}
+
+static inline void *producer_addr(struct rxe_queue *q)
+{
+ return q->buf->data + ((q->buf->producer_index & q->index_mask)
+ << q->log2_elem_size);
+}
+
+static inline void *consumer_addr(struct rxe_queue *q)
+{
+ return q->buf->data + ((q->buf->consumer_index & q->index_mask)
+ << q->log2_elem_size);
+}
+
+static inline unsigned int producer_index(struct rxe_queue *q)
+{
+ return q->buf->producer_index;
+}
+
+static inline unsigned int consumer_index(struct rxe_queue *q)
+{
+ return q->buf->consumer_index;
+}
+
+static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
+{
+ return q->buf->data + ((index & q->index_mask)
+ << q->buf->log2_elem_size);
+}
+
+static inline unsigned int index_from_addr(const struct rxe_queue *q,
+ const void *addr)
+{
+ return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)
+ & q->index_mask;
+}
+
+static inline unsigned int queue_count(const struct rxe_queue *q)
+{
+ return (q->buf->producer_index - q->buf->consumer_index)
+ & q->index_mask;
+}
+
+static inline void *queue_head(struct rxe_queue *q)
+{
+ return queue_empty(q) ? NULL : consumer_addr(q);
+}
+
+#endif /* RXE_QUEUE_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+
+#include "rxe.h"
+#include "rxe_loc.h"
+
+static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct rxe_qp *qp)
+{
+ if (unlikely(!qp->valid))
+ goto err1;
+
+ switch (qp_type(qp)) {
+ case IB_QPT_RC:
+ if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
+ pr_warn_ratelimited("bad qp type\n");
+ goto err1;
+ }
+ break;
+ case IB_QPT_UC:
+ if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
+ pr_warn_ratelimited("bad qp type\n");
+ goto err1;
+ }
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
+ pr_warn_ratelimited("bad qp type\n");
+ goto err1;
+ }
+ break;
+ default:
+ pr_warn_ratelimited("unsupported qp type\n");
+ goto err1;
+ }
+
+ if (pkt->mask & RXE_REQ_MASK) {
+ if (unlikely(qp->resp.state != QP_STATE_READY))
+ goto err1;
+ } else if (unlikely(qp->req.state < QP_STATE_READY ||
+ qp->req.state > QP_STATE_DRAINED)) {
+ goto err1;
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static void set_bad_pkey_cntr(struct rxe_port *port)
+{
+ spin_lock_bh(&port->port_lock);
+ port->attr.bad_pkey_cntr = min((u32)0xffff,
+ port->attr.bad_pkey_cntr + 1);
+ spin_unlock_bh(&port->port_lock);
+}
+
+static void set_qkey_viol_cntr(struct rxe_port *port)
+{
+ spin_lock_bh(&port->port_lock);
+ port->attr.qkey_viol_cntr = min((u32)0xffff,
+ port->attr.qkey_viol_cntr + 1);
+ spin_unlock_bh(&port->port_lock);
+}
+
+static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ u32 qpn, struct rxe_qp *qp)
+{
+ int i;
+ int found_pkey = 0;
+ struct rxe_port *port = &rxe->port;
+ u16 pkey = bth_pkey(pkt);
+
+ pkt->pkey_index = 0;
+
+ if (qpn == 1) {
+ for (i = 0; i < port->attr.pkey_tbl_len; i++) {
+ if (pkey_match(pkey, port->pkey_tbl[i])) {
+ pkt->pkey_index = i;
+ found_pkey = 1;
+ break;
+ }
+ }
+
+ if (!found_pkey) {
+ pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
+ set_bad_pkey_cntr(port);
+ goto err1;
+ }
+ } else if (qpn != 0) {
+ if (unlikely(!pkey_match(pkey,
+ port->pkey_tbl[qp->attr.pkey_index]
+ ))) {
+ pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey);
+ set_bad_pkey_cntr(port);
+ goto err1;
+ }
+ pkt->pkey_index = qp->attr.pkey_index;
+ }
+
+ if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
+ qpn != 0 && pkt->mask) {
+ u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
+
+ if (unlikely(deth_qkey(pkt) != qkey)) {
+ pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n",
+ deth_qkey(pkt), qkey, qpn);
+ set_qkey_viol_cntr(port);
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct rxe_qp *qp)
+{
+ struct sk_buff *skb = PKT_TO_SKB(pkt);
+
+ if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC)
+ goto done;
+
+ if (unlikely(pkt->port_num != qp->attr.port_num)) {
+ pr_warn_ratelimited("port %d != qp port %d\n",
+ pkt->port_num, qp->attr.port_num);
+ goto err1;
+ }
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ struct in_addr *saddr =
+ &qp->pri_av.sgid_addr._sockaddr_in.sin_addr;
+ struct in_addr *daddr =
+ &qp->pri_av.dgid_addr._sockaddr_in.sin_addr;
+
+ if (ip_hdr(skb)->daddr != saddr->s_addr) {
+ pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n",
+ &ip_hdr(skb)->daddr,
+ &saddr->s_addr);
+ goto err1;
+ }
+
+ if (ip_hdr(skb)->saddr != daddr->s_addr) {
+ pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n",
+ &ip_hdr(skb)->saddr,
+ &daddr->s_addr);
+ goto err1;
+ }
+
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct in6_addr *saddr =
+ &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr;
+ struct in6_addr *daddr =
+ &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr;
+
+ if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) {
+ pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n",
+ &ipv6_hdr(skb)->daddr, saddr);
+ goto err1;
+ }
+
+ if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) {
+ pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n",
+ &ipv6_hdr(skb)->saddr, daddr);
+ goto err1;
+ }
+ }
+
+done:
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static int hdr_check(struct rxe_pkt_info *pkt)
+{
+ struct rxe_dev *rxe = pkt->rxe;
+ struct rxe_port *port = &rxe->port;
+ struct rxe_qp *qp = NULL;
+ u32 qpn = bth_qpn(pkt);
+ int index;
+ int err;
+
+ if (unlikely(bth_tver(pkt) != BTH_TVER)) {
+ pr_warn_ratelimited("bad tver\n");
+ goto err1;
+ }
+
+ if (qpn != IB_MULTICAST_QPN) {
+ index = (qpn == 0) ? port->qp_smi_index :
+ ((qpn == 1) ? port->qp_gsi_index : qpn);
+ qp = rxe_pool_get_index(&rxe->qp_pool, index);
+ if (unlikely(!qp)) {
+ pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);
+ goto err1;
+ }
+
+ err = check_type_state(rxe, pkt, qp);
+ if (unlikely(err))
+ goto err2;
+
+ err = check_addr(rxe, pkt, qp);
+ if (unlikely(err))
+ goto err2;
+
+ err = check_keys(rxe, pkt, qpn, qp);
+ if (unlikely(err))
+ goto err2;
+ } else {
+ if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) {
+ pr_warn_ratelimited("no grh for mcast qpn\n");
+ goto err1;
+ }
+ }
+
+ pkt->qp = qp;
+ return 0;
+
+err2:
+ if (qp)
+ rxe_drop_ref(qp);
+err1:
+ return -EINVAL;
+}
+
+static inline void rxe_rcv_pkt(struct rxe_dev *rxe,
+ struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
+{
+ if (pkt->mask & RXE_REQ_MASK)
+ rxe_resp_queue_pkt(rxe, pkt->qp, skb);
+ else
+ rxe_comp_queue_pkt(rxe, pkt->qp, skb);
+}
+
+static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
+{
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+ struct rxe_mc_grp *mcg;
+ struct sk_buff *skb_copy;
+ struct rxe_mc_elem *mce;
+ struct rxe_qp *qp;
+ union ib_gid dgid;
+ int err;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
+ (struct in6_addr *)&dgid);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid));
+
+ /* lookup mcast group corresponding to mgid, takes a ref */
+ mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
+ if (!mcg)
+ goto err1; /* mcast group not registered */
+
+ spin_lock_bh(&mcg->mcg_lock);
+
+ list_for_each_entry(mce, &mcg->qp_list, qp_list) {
+ qp = mce->qp;
+ pkt = SKB_TO_PKT(skb);
+
+ /* validate qp for incoming packet */
+ err = check_type_state(rxe, pkt, qp);
+ if (err)
+ continue;
+
+ err = check_keys(rxe, pkt, bth_qpn(pkt), qp);
+ if (err)
+ continue;
+
+ /* if *not* the last qp in the list
+ * make a copy of the skb to post to the next qp
+ */
+ skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
+ skb_clone(skb, GFP_KERNEL) : NULL;
+
+ pkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(rxe, pkt, skb);
+
+ skb = skb_copy;
+ if (!skb)
+ break;
+ }
+
+ spin_unlock_bh(&mcg->mcg_lock);
+
+ rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
+
+err1:
+ if (skb)
+ kfree_skb(skb);
+}
+
+static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
+{
+ union ib_gid dgid;
+ union ib_gid *pdgid;
+ u16 index;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
+ (struct in6_addr *)&dgid);
+ pdgid = &dgid;
+ } else {
+ pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
+ }
+
+ return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
+ IB_GID_TYPE_ROCE_UDP_ENCAP,
+ 1, rxe->ndev, &index);
+}
+
+/* rxe_rcv is called from the interface driver */
+int rxe_rcv(struct sk_buff *skb)
+{
+ int err;
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+ struct rxe_dev *rxe = pkt->rxe;
+ __be32 *icrcp;
+ u32 calc_icrc, pack_icrc;
+
+ pkt->offset = 0;
+
+ if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
+ goto drop;
+
+ if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {
+ pr_warn_ratelimited("failed matching dgid\n");
+ goto drop;
+ }
+
+ pkt->opcode = bth_opcode(pkt);
+ pkt->psn = bth_psn(pkt);
+ pkt->qp = NULL;
+ pkt->mask |= rxe_opcode[pkt->opcode].mask;
+
+ if (unlikely(skb->len < header_size(pkt)))
+ goto drop;
+
+ err = hdr_check(pkt);
+ if (unlikely(err))
+ goto drop;
+
+ /* Verify ICRC */
+ icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
+ pack_icrc = be32_to_cpu(*icrcp);
+
+ calc_icrc = rxe_icrc_hdr(pkt, skb);
+ calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt));
+ calc_icrc = cpu_to_be32(~calc_icrc);
+ if (unlikely(calc_icrc != pack_icrc)) {
+ char saddr[sizeof(struct in6_addr)];
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);
+ else if (skb->protocol == htons(ETH_P_IP))
+ sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);
+ else
+ sprintf(saddr, "unknown");
+
+ pr_warn_ratelimited("bad ICRC from %s\n", saddr);
+ goto drop;
+ }
+
+ if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
+ rxe_rcv_mcast_pkt(rxe, skb);
+ else
+ rxe_rcv_pkt(rxe, pkt, skb);
+
+ return 0;
+
+drop:
+ if (pkt->qp)
+ rxe_drop_ref(pkt->qp);
+
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL(rxe_rcv);
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ unsigned opcode);
+
+static inline void retry_first_write_send(struct rxe_qp *qp,
+ struct rxe_send_wqe *wqe,
+ unsigned mask, int npsn)
+{
+ int i;
+
+ for (i = 0; i < npsn; i++) {
+ int to_send = (wqe->dma.resid > qp->mtu) ?
+ qp->mtu : wqe->dma.resid;
+
+ qp->req.opcode = next_opcode(qp, wqe,
+ wqe->wr.opcode);
+
+ if (wqe->wr.send_flags & IB_SEND_INLINE) {
+ wqe->dma.resid -= to_send;
+ wqe->dma.sge_offset += to_send;
+ } else {
+ advance_dma_data(&wqe->dma, to_send);
+ }
+ if (mask & WR_WRITE_MASK)
+ wqe->iova += qp->mtu;
+ }
+}
+
+static void req_retry(struct rxe_qp *qp)
+{
+ struct rxe_send_wqe *wqe;
+ unsigned int wqe_index;
+ unsigned int mask;
+ int npsn;
+ int first = 1;
+
+ wqe = queue_head(qp->sq.queue);
+ npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
+
+ qp->req.wqe_index = consumer_index(qp->sq.queue);
+ qp->req.psn = qp->comp.psn;
+ qp->req.opcode = -1;
+
+ for (wqe_index = consumer_index(qp->sq.queue);
+ wqe_index != producer_index(qp->sq.queue);
+ wqe_index = next_index(qp->sq.queue, wqe_index)) {
+ wqe = addr_from_index(qp->sq.queue, wqe_index);
+ mask = wr_opcode_mask(wqe->wr.opcode, qp);
+
+ if (wqe->state == wqe_state_posted)
+ break;
+
+ if (wqe->state == wqe_state_done)
+ continue;
+
+ wqe->iova = (mask & WR_ATOMIC_MASK) ?
+ wqe->wr.wr.atomic.remote_addr :
+ (mask & WR_READ_OR_WRITE_MASK) ?
+ wqe->wr.wr.rdma.remote_addr :
+ 0;
+
+ if (!first || (mask & WR_READ_MASK) == 0) {
+ wqe->dma.resid = wqe->dma.length;
+ wqe->dma.cur_sge = 0;
+ wqe->dma.sge_offset = 0;
+ }
+
+ if (first) {
+ first = 0;
+
+ if (mask & WR_WRITE_OR_SEND_MASK)
+ retry_first_write_send(qp, wqe, mask, npsn);
+
+ if (mask & WR_READ_MASK)
+ wqe->iova += npsn * qp->mtu;
+ }
+
+ wqe->state = wqe_state_posted;
+ }
+}
+
+void rnr_nak_timer(unsigned long data)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)data;
+
+ pr_debug("rnr nak timer fired\n");
+ rxe_run_task(&qp->req.task, 1);
+}
+
+static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+{
+ struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);
+ unsigned long flags;
+
+ if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
+ /* check to see if we are drained;
+ * state_lock used by requester and completer
+ */
+ spin_lock_irqsave(&qp->state_lock, flags);
+ do {
+ if (qp->req.state != QP_STATE_DRAIN) {
+ /* comp just finished */
+ spin_unlock_irqrestore(&qp->state_lock,
+ flags);
+ break;
+ }
+
+ if (wqe && ((qp->req.wqe_index !=
+ consumer_index(qp->sq.queue)) ||
+ (wqe->state != wqe_state_posted))) {
+ /* comp not done yet */
+ spin_unlock_irqrestore(&qp->state_lock,
+ flags);
+ break;
+ }
+
+ qp->req.state = QP_STATE_DRAINED;
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_SQ_DRAINED;
+ qp->ibqp.event_handler(&ev,
+ qp->ibqp.qp_context);
+ }
+ } while (0);
+ }
+
+ if (qp->req.wqe_index == producer_index(qp->sq.queue))
+ return NULL;
+
+ wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);
+
+ if (unlikely((qp->req.state == QP_STATE_DRAIN ||
+ qp->req.state == QP_STATE_DRAINED) &&
+ (wqe->state != wqe_state_processing)))
+ return NULL;
+
+ if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ (qp->req.wqe_index != consumer_index(qp->sq.queue)))) {
+ qp->req.wait_fence = 1;
+ return NULL;
+ }
+
+ wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
+ return wqe;
+}
+
+static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits)
+{
+ switch (opcode) {
+ case IB_WR_RDMA_WRITE:
+ if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
+ qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
+ return fits ?
+ IB_OPCODE_RC_RDMA_WRITE_LAST :
+ IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_RC_RDMA_WRITE_ONLY :
+ IB_OPCODE_RC_RDMA_WRITE_FIRST;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
+ qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
+ return fits ?
+ IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
+ IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
+ IB_OPCODE_RC_RDMA_WRITE_FIRST;
+
+ case IB_WR_SEND:
+ if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
+ qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
+ return fits ?
+ IB_OPCODE_RC_SEND_LAST :
+ IB_OPCODE_RC_SEND_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_RC_SEND_ONLY :
+ IB_OPCODE_RC_SEND_FIRST;
+
+ case IB_WR_SEND_WITH_IMM:
+ if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
+ qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
+ return fits ?
+ IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
+ IB_OPCODE_RC_SEND_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
+ IB_OPCODE_RC_SEND_FIRST;
+
+ case IB_WR_RDMA_READ:
+ return IB_OPCODE_RC_RDMA_READ_REQUEST;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ return IB_OPCODE_RC_COMPARE_SWAP;
+
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ return IB_OPCODE_RC_FETCH_ADD;
+
+ case IB_WR_SEND_WITH_INV:
+ if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
+ qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
+ return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
+ IB_OPCODE_RC_SEND_MIDDLE;
+ else
+ return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
+ IB_OPCODE_RC_SEND_FIRST;
+ case IB_WR_REG_MR:
+ case IB_WR_LOCAL_INV:
+ return opcode;
+ }
+
+ return -EINVAL;
+}
+
+static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits)
+{
+ switch (opcode) {
+ case IB_WR_RDMA_WRITE:
+ if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
+ qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
+ return fits ?
+ IB_OPCODE_UC_RDMA_WRITE_LAST :
+ IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_UC_RDMA_WRITE_ONLY :
+ IB_OPCODE_UC_RDMA_WRITE_FIRST;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
+ qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
+ return fits ?
+ IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
+ IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
+ IB_OPCODE_UC_RDMA_WRITE_FIRST;
+
+ case IB_WR_SEND:
+ if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
+ qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
+ return fits ?
+ IB_OPCODE_UC_SEND_LAST :
+ IB_OPCODE_UC_SEND_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_UC_SEND_ONLY :
+ IB_OPCODE_UC_SEND_FIRST;
+
+ case IB_WR_SEND_WITH_IMM:
+ if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
+ qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
+ return fits ?
+ IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
+ IB_OPCODE_UC_SEND_MIDDLE;
+ else
+ return fits ?
+ IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
+ IB_OPCODE_UC_SEND_FIRST;
+ }
+
+ return -EINVAL;
+}
+
+static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ unsigned opcode)
+{
+ int fits = (wqe->dma.resid <= qp->mtu);
+
+ switch (qp_type(qp)) {
+ case IB_QPT_RC:
+ return next_opcode_rc(qp, opcode, fits);
+
+ case IB_QPT_UC:
+ return next_opcode_uc(qp, opcode, fits);
+
+ case IB_QPT_SMI:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ switch (opcode) {
+ case IB_WR_SEND:
+ return IB_OPCODE_UD_SEND_ONLY;
+
+ case IB_WR_SEND_WITH_IMM:
+ return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ int depth;
+
+ if (wqe->has_rd_atomic)
+ return 0;
+
+ qp->req.need_rd_atomic = 1;
+ depth = atomic_dec_return(&qp->req.rd_atomic);
+
+ if (depth >= 0) {
+ qp->req.need_rd_atomic = 0;
+ wqe->has_rd_atomic = 1;
+ return 0;
+ }
+
+ atomic_inc(&qp->req.rd_atomic);
+ return -EAGAIN;
+}
+
+static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_port *port;
+ struct rxe_av *av;
+
+ if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
+ return qp->mtu;
+
+ av = &wqe->av;
+ port = &rxe->port;
+
+ return port->mtu_cap;
+}
+
+static struct sk_buff *init_req_packet(struct rxe_qp *qp,
+ struct rxe_send_wqe *wqe,
+ int opcode, int payload,
+ struct rxe_pkt_info *pkt)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_port *port = &rxe->port;
+ struct sk_buff *skb;
+ struct rxe_send_wr *ibwr = &wqe->wr;
+ struct rxe_av *av;
+ int pad = (-payload) & 0x3;
+ int paylen;
+ int solicited;
+ u16 pkey;
+ u32 qp_num;
+ int ack_req;
+
+ /* length from start of bth to end of icrc */
+ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
+
+ /* pkt->hdr, rxe, port_num and mask are initialized in ifc
+ * layer
+ */
+ pkt->opcode = opcode;
+ pkt->qp = qp;
+ pkt->psn = qp->req.psn;
+ pkt->mask = rxe_opcode[opcode].mask;
+ pkt->paylen = paylen;
+ pkt->offset = 0;
+ pkt->wqe = wqe;
+
+ /* init skb */
+ av = rxe_get_av(pkt);
+ skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* init bth */
+ solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
+ (pkt->mask & RXE_END_MASK) &&
+ ((pkt->mask & (RXE_SEND_MASK)) ||
+ (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
+ (RXE_WRITE_MASK | RXE_IMMDT_MASK));
+
+ pkey = (qp_type(qp) == IB_QPT_GSI) ?
+ port->pkey_tbl[ibwr->wr.ud.pkey_index] :
+ port->pkey_tbl[qp->attr.pkey_index];
+
+ qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
+ qp->attr.dest_qp_num;
+
+ ack_req = ((pkt->mask & RXE_END_MASK) ||
+ (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
+ if (ack_req)
+ qp->req.noack_pkts = 0;
+
+ bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
+ ack_req, pkt->psn);
+
+ /* init optional headers */
+ if (pkt->mask & RXE_RETH_MASK) {
+ reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
+ reth_set_va(pkt, wqe->iova);
+ reth_set_len(pkt, wqe->dma.length);
+ }
+
+ if (pkt->mask & RXE_IMMDT_MASK)
+ immdt_set_imm(pkt, ibwr->ex.imm_data);
+
+ if (pkt->mask & RXE_IETH_MASK)
+ ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
+
+ if (pkt->mask & RXE_ATMETH_MASK) {
+ atmeth_set_va(pkt, wqe->iova);
+ if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
+ opcode == IB_OPCODE_RD_COMPARE_SWAP) {
+ atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
+ atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
+ } else {
+ atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
+ }
+ atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
+ }
+
+ if (pkt->mask & RXE_DETH_MASK) {
+ if (qp->ibqp.qp_num == 1)
+ deth_set_qkey(pkt, GSI_QKEY);
+ else
+ deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
+ deth_set_sqp(pkt, qp->ibqp.qp_num);
+ }
+
+ return skb;
+}
+
+static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt, struct sk_buff *skb,
+ int paylen)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ u32 crc = 0;
+ u32 *p;
+ int err;
+
+ err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc);
+ if (err)
+ return err;
+
+ if (pkt->mask & RXE_WRITE_OR_SEND) {
+ if (wqe->wr.send_flags & IB_SEND_INLINE) {
+ u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
+
+ crc = crc32_le(crc, tmp, paylen);
+
+ memcpy(payload_addr(pkt), tmp, paylen);
+
+ wqe->dma.resid -= paylen;
+ wqe->dma.sge_offset += paylen;
+ } else {
+ err = copy_data(rxe, qp->pd, 0, &wqe->dma,
+ payload_addr(pkt), paylen,
+ from_mem_obj,
+ &crc);
+ if (err)
+ return err;
+ }
+ }
+ p = payload_addr(pkt) + paylen + bth_pad(pkt);
+
+ *p = ~crc;
+
+ return 0;
+}
+
+static void update_wqe_state(struct rxe_qp *qp,
+ struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt,
+ enum wqe_state *prev_state)
+{
+ enum wqe_state prev_state_ = wqe->state;
+
+ if (pkt->mask & RXE_END_MASK) {
+ if (qp_type(qp) == IB_QPT_RC)
+ wqe->state = wqe_state_pending;
+ } else {
+ wqe->state = wqe_state_processing;
+ }
+
+ *prev_state = prev_state_;
+}
+
+static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt, int payload)
+{
+ /* number of packets left to send including current one */
+ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
+
+ /* handle zero length packet case */
+ if (num_pkt == 0)
+ num_pkt = 1;
+
+ if (pkt->mask & RXE_START_MASK) {
+ wqe->first_psn = qp->req.psn;
+ wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
+ }
+
+ if (pkt->mask & RXE_READ_MASK)
+ qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
+ else
+ qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+
+ qp->req.opcode = pkt->opcode;
+
+
+ if (pkt->mask & RXE_END_MASK)
+ qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
+
+ qp->need_req_skb = 0;
+
+ if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
+ mod_timer(&qp->retrans_timer,
+ jiffies + qp->qp_timeout_jiffies);
+}
+
+int rxe_requester(void *arg)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_pkt_info pkt;
+ struct sk_buff *skb;
+ struct rxe_send_wqe *wqe;
+ unsigned mask;
+ int payload;
+ int mtu;
+ int opcode;
+ int ret;
+ enum wqe_state prev_state;
+
+next_wqe:
+ if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
+ goto exit;
+
+ if (unlikely(qp->req.state == QP_STATE_RESET)) {
+ qp->req.wqe_index = consumer_index(qp->sq.queue);
+ qp->req.opcode = -1;
+ qp->req.need_rd_atomic = 0;
+ qp->req.wait_psn = 0;
+ qp->req.need_retry = 0;
+ goto exit;
+ }
+
+ if (unlikely(qp->req.need_retry)) {
+ req_retry(qp);
+ qp->req.need_retry = 0;
+ }
+
+ wqe = req_next_wqe(qp);
+ if (unlikely(!wqe))
+ goto exit;
+
+ if (wqe->mask & WR_REG_MASK) {
+ if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mem *rmr;
+
+ rmr = rxe_pool_get_index(&rxe->mr_pool,
+ wqe->wr.ex.invalidate_rkey >> 8);
+ if (!rmr) {
+ pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey);
+ wqe->state = wqe_state_error;
+ wqe->status = IB_WC_MW_BIND_ERR;
+ goto exit;
+ }
+ rmr->state = RXE_MEM_STATE_FREE;
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+ } else if (wqe->wr.opcode == IB_WR_REG_MR) {
+ struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr);
+
+ rmr->state = RXE_MEM_STATE_VALID;
+ rmr->access = wqe->wr.wr.reg.access;
+ rmr->lkey = wqe->wr.wr.reg.key;
+ rmr->rkey = wqe->wr.wr.reg.key;
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+ } else {
+ goto exit;
+ }
+ qp->req.wqe_index = next_index(qp->sq.queue,
+ qp->req.wqe_index);
+ goto next_wqe;
+ }
+
+ if (unlikely(qp_type(qp) == IB_QPT_RC &&
+ qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) {
+ qp->req.wait_psn = 1;
+ goto exit;
+ }
+
+ /* Limit the number of inflight SKBs per QP */
+ if (unlikely(atomic_read(&qp->skb_out) >
+ RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
+ qp->need_req_skb = 1;
+ goto exit;
+ }
+
+ opcode = next_opcode(qp, wqe, wqe->wr.opcode);
+ if (unlikely(opcode < 0)) {
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ goto exit;
+ }
+
+ mask = rxe_opcode[opcode].mask;
+ if (unlikely(mask & RXE_READ_OR_ATOMIC)) {
+ if (check_init_depth(qp, wqe))
+ goto exit;
+ }
+
+ mtu = get_mtu(qp, wqe);
+ payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
+ if (payload > mtu) {
+ if (qp_type(qp) == IB_QPT_UD) {
+ /* C10-93.1.1: If the total sum of all the buffer lengths specified for a
+ * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
+ * shall not emit any packets for this message. Further, the CI shall not
+ * generate an error due to this condition.
+ */
+
+ /* fake a successful UD send */
+ wqe->first_psn = qp->req.psn;
+ wqe->last_psn = qp->req.psn;
+ qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+ qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
+ qp->req.wqe_index = next_index(qp->sq.queue,
+ qp->req.wqe_index);
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+ goto complete;
+ }
+ payload = mtu;
+ }
+
+ skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
+ if (unlikely(!skb)) {
+ pr_err("Failed allocating skb\n");
+ goto err;
+ }
+
+ if (fill_packet(qp, wqe, &pkt, skb, payload)) {
+ pr_debug("Error during fill packet\n");
+ goto err;
+ }
+
+ update_wqe_state(qp, wqe, &pkt, &prev_state);
+ ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
+ if (ret) {
+ qp->need_req_skb = 1;
+ kfree_skb(skb);
+
+ wqe->state = prev_state;
+
+ if (ret == -EAGAIN) {
+ rxe_run_task(&qp->req.task, 1);
+ goto exit;
+ }
+
+ goto err;
+ }
+
+ update_state(qp, wqe, &pkt, payload);
+
+ goto next_wqe;
+
+err:
+ kfree_skb(skb);
+ wqe->status = IB_WC_LOC_PROT_ERR;
+ wqe->state = wqe_state_error;
+
+complete:
+ if (qp_type(qp) != IB_QPT_RC) {
+ while (rxe_completer(qp) == 0)
+ ;
+ }
+
+ return 0;
+
+exit:
+ return -EAGAIN;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+enum resp_states {
+ RESPST_NONE,
+ RESPST_GET_REQ,
+ RESPST_CHK_PSN,
+ RESPST_CHK_OP_SEQ,
+ RESPST_CHK_OP_VALID,
+ RESPST_CHK_RESOURCE,
+ RESPST_CHK_LENGTH,
+ RESPST_CHK_RKEY,
+ RESPST_EXECUTE,
+ RESPST_READ_REPLY,
+ RESPST_COMPLETE,
+ RESPST_ACKNOWLEDGE,
+ RESPST_CLEANUP,
+ RESPST_DUPLICATE_REQUEST,
+ RESPST_ERR_MALFORMED_WQE,
+ RESPST_ERR_UNSUPPORTED_OPCODE,
+ RESPST_ERR_MISALIGNED_ATOMIC,
+ RESPST_ERR_PSN_OUT_OF_SEQ,
+ RESPST_ERR_MISSING_OPCODE_FIRST,
+ RESPST_ERR_MISSING_OPCODE_LAST_C,
+ RESPST_ERR_MISSING_OPCODE_LAST_D1E,
+ RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
+ RESPST_ERR_RNR,
+ RESPST_ERR_RKEY_VIOLATION,
+ RESPST_ERR_LENGTH,
+ RESPST_ERR_CQ_OVERFLOW,
+ RESPST_ERROR,
+ RESPST_RESET,
+ RESPST_DONE,
+ RESPST_EXIT,
+};
+
+static char *resp_state_name[] = {
+ [RESPST_NONE] = "NONE",
+ [RESPST_GET_REQ] = "GET_REQ",
+ [RESPST_CHK_PSN] = "CHK_PSN",
+ [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ",
+ [RESPST_CHK_OP_VALID] = "CHK_OP_VALID",
+ [RESPST_CHK_RESOURCE] = "CHK_RESOURCE",
+ [RESPST_CHK_LENGTH] = "CHK_LENGTH",
+ [RESPST_CHK_RKEY] = "CHK_RKEY",
+ [RESPST_EXECUTE] = "EXECUTE",
+ [RESPST_READ_REPLY] = "READ_REPLY",
+ [RESPST_COMPLETE] = "COMPLETE",
+ [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
+ [RESPST_CLEANUP] = "CLEANUP",
+ [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST",
+ [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE",
+ [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE",
+ [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC",
+ [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ",
+ [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST",
+ [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C",
+ [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E",
+ [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ",
+ [RESPST_ERR_RNR] = "ERR_RNR",
+ [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",
+ [RESPST_ERR_LENGTH] = "ERR_LENGTH",
+ [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",
+ [RESPST_ERROR] = "ERROR",
+ [RESPST_RESET] = "RESET",
+ [RESPST_DONE] = "DONE",
+ [RESPST_EXIT] = "EXIT",
+};
+
+/* rxe_recv calls here to add a request packet to the input queue */
+void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
+ struct sk_buff *skb)
+{
+ int must_sched;
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
+ skb_queue_tail(&qp->req_pkts, skb);
+
+ must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
+ (skb_queue_len(&qp->req_pkts) > 1);
+
+ rxe_run_task(&qp->resp.task, must_sched);
+}
+
+static inline enum resp_states get_req(struct rxe_qp *qp,
+ struct rxe_pkt_info **pkt_p)
+{
+ struct sk_buff *skb;
+
+ if (qp->resp.state == QP_STATE_ERROR) {
+ skb = skb_dequeue(&qp->req_pkts);
+ if (skb) {
+ /* drain request packet queue */
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ return RESPST_GET_REQ;
+ }
+
+ /* go drain recv wr queue */
+ return RESPST_CHK_RESOURCE;
+ }
+
+ skb = skb_peek(&qp->req_pkts);
+ if (!skb)
+ return RESPST_EXIT;
+
+ *pkt_p = SKB_TO_PKT(skb);
+
+ return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
+}
+
+static enum resp_states check_psn(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ int diff = psn_compare(pkt->psn, qp->resp.psn);
+
+ switch (qp_type(qp)) {
+ case IB_QPT_RC:
+ if (diff > 0) {
+ if (qp->resp.sent_psn_nak)
+ return RESPST_CLEANUP;
+
+ qp->resp.sent_psn_nak = 1;
+ return RESPST_ERR_PSN_OUT_OF_SEQ;
+
+ } else if (diff < 0) {
+ return RESPST_DUPLICATE_REQUEST;
+ }
+
+ if (qp->resp.sent_psn_nak)
+ qp->resp.sent_psn_nak = 0;
+
+ break;
+
+ case IB_QPT_UC:
+ if (qp->resp.drop_msg || diff != 0) {
+ if (pkt->mask & RXE_START_MASK) {
+ qp->resp.drop_msg = 0;
+ return RESPST_CHK_OP_SEQ;
+ }
+
+ qp->resp.drop_msg = 1;
+ return RESPST_CLEANUP;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return RESPST_CHK_OP_SEQ;
+}
+
+static enum resp_states check_op_seq(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ switch (qp_type(qp)) {
+ case IB_QPT_RC:
+ switch (qp->resp.opcode) {
+ case IB_OPCODE_RC_SEND_FIRST:
+ case IB_OPCODE_RC_SEND_MIDDLE:
+ switch (pkt->opcode) {
+ case IB_OPCODE_RC_SEND_MIDDLE:
+ case IB_OPCODE_RC_SEND_LAST:
+ case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
+ return RESPST_CHK_OP_VALID;
+ default:
+ return RESPST_ERR_MISSING_OPCODE_LAST_C;
+ }
+
+ case IB_OPCODE_RC_RDMA_WRITE_FIRST:
+ case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
+ switch (pkt->opcode) {
+ case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
+ case IB_OPCODE_RC_RDMA_WRITE_LAST:
+ case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ return RESPST_CHK_OP_VALID;
+ default:
+ return RESPST_ERR_MISSING_OPCODE_LAST_C;
+ }
+
+ default:
+ switch (pkt->opcode) {
+ case IB_OPCODE_RC_SEND_MIDDLE:
+ case IB_OPCODE_RC_SEND_LAST:
+ case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
+ case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
+ case IB_OPCODE_RC_RDMA_WRITE_LAST:
+ case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ return RESPST_ERR_MISSING_OPCODE_FIRST;
+ default:
+ return RESPST_CHK_OP_VALID;
+ }
+ }
+ break;
+
+ case IB_QPT_UC:
+ switch (qp->resp.opcode) {
+ case IB_OPCODE_UC_SEND_FIRST:
+ case IB_OPCODE_UC_SEND_MIDDLE:
+ switch (pkt->opcode) {
+ case IB_OPCODE_UC_SEND_MIDDLE:
+ case IB_OPCODE_UC_SEND_LAST:
+ case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
+ return RESPST_CHK_OP_VALID;
+ default:
+ return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
+ }
+
+ case IB_OPCODE_UC_RDMA_WRITE_FIRST:
+ case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
+ switch (pkt->opcode) {
+ case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
+ case IB_OPCODE_UC_RDMA_WRITE_LAST:
+ case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ return RESPST_CHK_OP_VALID;
+ default:
+ return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
+ }
+
+ default:
+ switch (pkt->opcode) {
+ case IB_OPCODE_UC_SEND_MIDDLE:
+ case IB_OPCODE_UC_SEND_LAST:
+ case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
+ case IB_OPCODE_UC_RDMA_WRITE_LAST:
+ case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ qp->resp.drop_msg = 1;
+ return RESPST_CLEANUP;
+ default:
+ return RESPST_CHK_OP_VALID;
+ }
+ }
+ break;
+
+ default:
+ return RESPST_CHK_OP_VALID;
+ }
+}
+
+static enum resp_states check_op_valid(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ switch (qp_type(qp)) {
+ case IB_QPT_RC:
+ if (((pkt->mask & RXE_READ_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
+ ((pkt->mask & RXE_WRITE_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
+ ((pkt->mask & RXE_ATOMIC_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+ }
+
+ break;
+
+ case IB_QPT_UC:
+ if ((pkt->mask & RXE_WRITE_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
+ qp->resp.drop_msg = 1;
+ return RESPST_CLEANUP;
+ }
+
+ break;
+
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ break;
+
+ default:
+ WARN_ON(1);
+ break;
+ }
+
+ return RESPST_CHK_RESOURCE;
+}
+
+static enum resp_states get_srq_wqe(struct rxe_qp *qp)
+{
+ struct rxe_srq *srq = qp->srq;
+ struct rxe_queue *q = srq->rq.queue;
+ struct rxe_recv_wqe *wqe;
+ struct ib_event ev;
+
+ if (srq->error)
+ return RESPST_ERR_RNR;
+
+ spin_lock_bh(&srq->rq.consumer_lock);
+
+ wqe = queue_head(q);
+ if (!wqe) {
+ spin_unlock_bh(&srq->rq.consumer_lock);
+ return RESPST_ERR_RNR;
+ }
+
+ /* note kernel and user space recv wqes have same size */
+ memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe));
+
+ qp->resp.wqe = &qp->resp.srq_wqe.wqe;
+ advance_consumer(q);
+
+ if (srq->limit && srq->ibsrq.event_handler &&
+ (queue_count(q) < srq->limit)) {
+ srq->limit = 0;
+ goto event;
+ }
+
+ spin_unlock_bh(&srq->rq.consumer_lock);
+ return RESPST_CHK_LENGTH;
+
+event:
+ spin_unlock_bh(&srq->rq.consumer_lock);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
+ return RESPST_CHK_LENGTH;
+}
+
+static enum resp_states check_resource(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct rxe_srq *srq = qp->srq;
+
+ if (qp->resp.state == QP_STATE_ERROR) {
+ if (qp->resp.wqe) {
+ qp->resp.status = IB_WC_WR_FLUSH_ERR;
+ return RESPST_COMPLETE;
+ } else if (!srq) {
+ qp->resp.wqe = queue_head(qp->rq.queue);
+ if (qp->resp.wqe) {
+ qp->resp.status = IB_WC_WR_FLUSH_ERR;
+ return RESPST_COMPLETE;
+ } else {
+ return RESPST_EXIT;
+ }
+ } else {
+ return RESPST_EXIT;
+ }
+ }
+
+ if (pkt->mask & RXE_READ_OR_ATOMIC) {
+ /* it is the requesters job to not send
+ * too many read/atomic ops, we just
+ * recycle the responder resource queue
+ */
+ if (likely(qp->attr.max_rd_atomic > 0))
+ return RESPST_CHK_LENGTH;
+ else
+ return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
+ }
+
+ if (pkt->mask & RXE_RWR_MASK) {
+ if (srq)
+ return get_srq_wqe(qp);
+
+ qp->resp.wqe = queue_head(qp->rq.queue);
+ return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
+ }
+
+ return RESPST_CHK_LENGTH;
+}
+
+static enum resp_states check_length(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ switch (qp_type(qp)) {
+ case IB_QPT_RC:
+ return RESPST_CHK_RKEY;
+
+ case IB_QPT_UC:
+ return RESPST_CHK_RKEY;
+
+ default:
+ return RESPST_CHK_RKEY;
+ }
+}
+
+static enum resp_states check_rkey(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct rxe_mem *mem;
+ u64 va;
+ u32 rkey;
+ u32 resid;
+ u32 pktlen;
+ int mtu = qp->mtu;
+ enum resp_states state;
+ int access;
+
+ if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
+ if (pkt->mask & RXE_RETH_MASK) {
+ qp->resp.va = reth_va(pkt);
+ qp->resp.rkey = reth_rkey(pkt);
+ qp->resp.resid = reth_len(pkt);
+ }
+ access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
+ : IB_ACCESS_REMOTE_WRITE;
+ } else if (pkt->mask & RXE_ATOMIC_MASK) {
+ qp->resp.va = atmeth_va(pkt);
+ qp->resp.rkey = atmeth_rkey(pkt);
+ qp->resp.resid = sizeof(u64);
+ access = IB_ACCESS_REMOTE_ATOMIC;
+ } else {
+ return RESPST_EXECUTE;
+ }
+
+ va = qp->resp.va;
+ rkey = qp->resp.rkey;
+ resid = qp->resp.resid;
+ pktlen = payload_size(pkt);
+
+ mem = lookup_mem(qp->pd, access, rkey, lookup_remote);
+ if (!mem) {
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err1;
+ }
+
+ if (unlikely(mem->state == RXE_MEM_STATE_FREE)) {
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err1;
+ }
+
+ if (mem_check_range(mem, va, resid)) {
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err2;
+ }
+
+ if (pkt->mask & RXE_WRITE_MASK) {
+ if (resid > mtu) {
+ if (pktlen != mtu || bth_pad(pkt)) {
+ state = RESPST_ERR_LENGTH;
+ goto err2;
+ }
+
+ resid = mtu;
+ } else {
+ if (pktlen != resid) {
+ state = RESPST_ERR_LENGTH;
+ goto err2;
+ }
+ if ((bth_pad(pkt) != (0x3 & (-resid)))) {
+ /* This case may not be exactly that
+ * but nothing else fits.
+ */
+ state = RESPST_ERR_LENGTH;
+ goto err2;
+ }
+ }
+ }
+
+ WARN_ON(qp->resp.mr);
+
+ qp->resp.mr = mem;
+ return RESPST_EXECUTE;
+
+err2:
+ rxe_drop_ref(mem);
+err1:
+ return state;
+}
+
+static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
+ int data_len)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+
+ err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
+ data_addr, data_len, to_mem_obj, NULL);
+ if (unlikely(err))
+ return (err == -ENOSPC) ? RESPST_ERR_LENGTH
+ : RESPST_ERR_MALFORMED_WQE;
+
+ return RESPST_NONE;
+}
+
+static enum resp_states write_data_in(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ enum resp_states rc = RESPST_NONE;
+ int err;
+ int data_len = payload_size(pkt);
+
+ err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt),
+ data_len, to_mem_obj, NULL);
+ if (err) {
+ rc = RESPST_ERR_RKEY_VIOLATION;
+ goto out;
+ }
+
+ qp->resp.va += data_len;
+ qp->resp.resid -= data_len;
+
+out:
+ return rc;
+}
+
+/* Guarantee atomicity of atomic operations at the machine level. */
+static DEFINE_SPINLOCK(atomic_ops_lock);
+
+static enum resp_states process_atomic(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 iova = atmeth_va(pkt);
+ u64 *vaddr;
+ enum resp_states ret;
+ struct rxe_mem *mr = qp->resp.mr;
+
+ if (mr->state != RXE_MEM_STATE_VALID) {
+ ret = RESPST_ERR_RKEY_VIOLATION;
+ goto out;
+ }
+
+ vaddr = iova_to_vaddr(mr, iova, sizeof(u64));
+
+ /* check vaddr is 8 bytes aligned. */
+ if (!vaddr || (uintptr_t)vaddr & 7) {
+ ret = RESPST_ERR_MISALIGNED_ATOMIC;
+ goto out;
+ }
+
+ spin_lock_bh(&atomic_ops_lock);
+
+ qp->resp.atomic_orig = *vaddr;
+
+ if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
+ pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
+ if (*vaddr == atmeth_comp(pkt))
+ *vaddr = atmeth_swap_add(pkt);
+ } else {
+ *vaddr += atmeth_swap_add(pkt);
+ }
+
+ spin_unlock_bh(&atomic_ops_lock);
+
+ ret = RESPST_NONE;
+out:
+ return ret;
+}
+
+static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_pkt_info *ack,
+ int opcode,
+ int payload,
+ u32 psn,
+ u8 syndrome,
+ u32 *crcp)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct sk_buff *skb;
+ u32 crc = 0;
+ u32 *p;
+ int paylen;
+ int pad;
+ int err;
+
+ /*
+ * allocate packet
+ */
+ pad = (-payload) & 0x3;
+ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
+
+ skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack);
+ if (!skb)
+ return NULL;
+
+ ack->qp = qp;
+ ack->opcode = opcode;
+ ack->mask = rxe_opcode[opcode].mask;
+ ack->offset = pkt->offset;
+ ack->paylen = paylen;
+
+ /* fill in bth using the request packet headers */
+ memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES);
+
+ bth_set_opcode(ack, opcode);
+ bth_set_qpn(ack, qp->attr.dest_qp_num);
+ bth_set_pad(ack, pad);
+ bth_set_se(ack, 0);
+ bth_set_psn(ack, psn);
+ bth_set_ack(ack, 0);
+ ack->psn = psn;
+
+ if (ack->mask & RXE_AETH_MASK) {
+ aeth_set_syn(ack, syndrome);
+ aeth_set_msn(ack, qp->resp.msn);
+ }
+
+ if (ack->mask & RXE_ATMACK_MASK)
+ atmack_set_orig(ack, qp->resp.atomic_orig);
+
+ err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc);
+ if (err) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ if (crcp) {
+ /* CRC computation will be continued by the caller */
+ *crcp = crc;
+ } else {
+ p = payload_addr(ack) + payload + bth_pad(ack);
+ *p = ~crc;
+ }
+
+ return skb;
+}
+
+/* RDMA read response. If res is not NULL, then we have a current RDMA request
+ * being processed or replayed.
+ */
+static enum resp_states read_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *req_pkt)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_pkt_info ack_pkt;
+ struct sk_buff *skb;
+ int mtu = qp->mtu;
+ enum resp_states state;
+ int payload;
+ int opcode;
+ int err;
+ struct resp_res *res = qp->resp.res;
+ u32 icrc;
+ u32 *p;
+
+ if (!res) {
+ /* This is the first time we process that request. Get a
+ * resource
+ */
+ res = &qp->resp.resources[qp->resp.res_head];
+
+ free_rd_atomic_resource(qp, res);
+ rxe_advance_resp_resource(qp);
+
+ res->type = RXE_READ_MASK;
+
+ res->read.va = qp->resp.va;
+ res->read.va_org = qp->resp.va;
+
+ res->first_psn = req_pkt->psn;
+ res->last_psn = req_pkt->psn +
+ (reth_len(req_pkt) + mtu - 1) /
+ mtu - 1;
+ res->cur_psn = req_pkt->psn;
+
+ res->read.resid = qp->resp.resid;
+ res->read.length = qp->resp.resid;
+ res->read.rkey = qp->resp.rkey;
+
+ /* note res inherits the reference to mr from qp */
+ res->read.mr = qp->resp.mr;
+ qp->resp.mr = NULL;
+
+ qp->resp.res = res;
+ res->state = rdatm_res_state_new;
+ }
+
+ if (res->state == rdatm_res_state_new) {
+ if (res->read.resid <= mtu)
+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
+ else
+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
+ } else {
+ if (res->read.resid > mtu)
+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
+ else
+ opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
+ }
+
+ res->state = rdatm_res_state_next;
+
+ payload = min_t(int, res->read.resid, mtu);
+
+ skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
+ res->cur_psn, AETH_ACK_UNLIMITED, &icrc);
+ if (!skb)
+ return RESPST_ERR_RNR;
+
+ err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
+ payload, from_mem_obj, &icrc);
+ if (err)
+ pr_err("Failed copying memory\n");
+
+ p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
+ *p = ~icrc;
+
+ err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ if (err) {
+ pr_err("Failed sending RDMA reply.\n");
+ kfree_skb(skb);
+ return RESPST_ERR_RNR;
+ }
+
+ res->read.va += payload;
+ res->read.resid -= payload;
+ res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
+
+ if (res->read.resid > 0) {
+ state = RESPST_DONE;
+ } else {
+ qp->resp.res = NULL;
+ qp->resp.opcode = -1;
+ qp->resp.psn = res->cur_psn;
+ state = RESPST_CLEANUP;
+ }
+
+ return state;
+}
+
+/* Executes a new request. A retried request never reach that function (send
+ * and writes are discarded, and reads and atomics are retried elsewhere.
+ */
+static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ enum resp_states err;
+
+ if (pkt->mask & RXE_SEND_MASK) {
+ if (qp_type(qp) == IB_QPT_UD ||
+ qp_type(qp) == IB_QPT_SMI ||
+ qp_type(qp) == IB_QPT_GSI) {
+ union rdma_network_hdr hdr;
+ struct sk_buff *skb = PKT_TO_SKB(pkt);
+
+ memset(&hdr, 0, sizeof(hdr));
+ if (skb->protocol == htons(ETH_P_IP))
+ memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh));
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh));
+
+ err = send_data_in(qp, &hdr, sizeof(hdr));
+ if (err)
+ return err;
+ }
+ err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
+ if (err)
+ return err;
+ } else if (pkt->mask & RXE_WRITE_MASK) {
+ err = write_data_in(qp, pkt);
+ if (err)
+ return err;
+ } else if (pkt->mask & RXE_READ_MASK) {
+ /* For RDMA Read we can increment the msn now. See C9-148. */
+ qp->resp.msn++;
+ return RESPST_READ_REPLY;
+ } else if (pkt->mask & RXE_ATOMIC_MASK) {
+ err = process_atomic(qp, pkt);
+ if (err)
+ return err;
+ } else
+ /* Unreachable */
+ WARN_ON(1);
+
+ /* We successfully processed this new request. */
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ if (pkt->mask & RXE_COMP_MASK)
+ return RESPST_COMPLETE;
+ else if (qp_type(qp) == IB_QPT_RC)
+ return RESPST_ACKNOWLEDGE;
+ else
+ return RESPST_CLEANUP;
+}
+
+static enum resp_states do_complete(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct rxe_cqe cqe;
+ struct ib_wc *wc = &cqe.ibwc;
+ struct ib_uverbs_wc *uwc = &cqe.uibwc;
+ struct rxe_recv_wqe *wqe = qp->resp.wqe;
+
+ if (unlikely(!wqe))
+ return RESPST_CLEANUP;
+
+ memset(&cqe, 0, sizeof(cqe));
+
+ wc->wr_id = wqe->wr_id;
+ wc->status = qp->resp.status;
+ wc->qp = &qp->ibqp;
+
+ /* fields after status are not required for errors */
+ if (wc->status == IB_WC_SUCCESS) {
+ wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
+ pkt->mask & RXE_WRITE_MASK) ?
+ IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
+ wc->vendor_err = 0;
+ wc->byte_len = wqe->dma.length - wqe->dma.resid;
+
+ /* fields after byte_len are different between kernel and user
+ * space
+ */
+ if (qp->rcq->is_user) {
+ uwc->wc_flags = IB_WC_GRH;
+
+ if (pkt->mask & RXE_IMMDT_MASK) {
+ uwc->wc_flags |= IB_WC_WITH_IMM;
+ uwc->ex.imm_data =
+ (__u32 __force)immdt_imm(pkt);
+ }
+
+ if (pkt->mask & RXE_IETH_MASK) {
+ uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ uwc->ex.invalidate_rkey = ieth_rkey(pkt);
+ }
+
+ uwc->qp_num = qp->ibqp.qp_num;
+
+ if (pkt->mask & RXE_DETH_MASK)
+ uwc->src_qp = deth_sqp(pkt);
+
+ uwc->port_num = qp->attr.port_num;
+ } else {
+ struct sk_buff *skb = PKT_TO_SKB(pkt);
+
+ wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
+ if (skb->protocol == htons(ETH_P_IP))
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+
+ if (pkt->mask & RXE_IMMDT_MASK) {
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = immdt_imm(pkt);
+ }
+
+ if (pkt->mask & RXE_IETH_MASK) {
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mem *rmr;
+
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = ieth_rkey(pkt);
+
+ rmr = rxe_pool_get_index(&rxe->mr_pool,
+ wc->ex.invalidate_rkey >> 8);
+ if (unlikely(!rmr)) {
+ pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey);
+ return RESPST_ERROR;
+ }
+ rmr->state = RXE_MEM_STATE_FREE;
+ }
+
+ wc->qp = &qp->ibqp;
+
+ if (pkt->mask & RXE_DETH_MASK)
+ wc->src_qp = deth_sqp(pkt);
+
+ wc->port_num = qp->attr.port_num;
+ }
+ }
+
+ /* have copy for srq and reference for !srq */
+ if (!qp->srq)
+ advance_consumer(qp->rq.queue);
+
+ qp->resp.wqe = NULL;
+
+ if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
+ return RESPST_ERR_CQ_OVERFLOW;
+
+ if (qp->resp.state == QP_STATE_ERROR)
+ return RESPST_CHK_RESOURCE;
+
+ if (!pkt)
+ return RESPST_DONE;
+ else if (qp_type(qp) == IB_QPT_RC)
+ return RESPST_ACKNOWLEDGE;
+ else
+ return RESPST_CLEANUP;
+}
+
+static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+ u8 syndrome, u32 psn)
+{
+ int err = 0;
+ struct rxe_pkt_info ack_pkt;
+ struct sk_buff *skb;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+
+ skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
+ 0, psn, syndrome, NULL);
+ if (!skb) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ if (err) {
+ pr_err_ratelimited("Failed sending ack\n");
+ kfree_skb(skb);
+ }
+
+err1:
+ return err;
+}
+
+static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+ u8 syndrome)
+{
+ int rc = 0;
+ struct rxe_pkt_info ack_pkt;
+ struct sk_buff *skb;
+ struct sk_buff *skb_copy;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct resp_res *res;
+
+ skb = prepare_ack_packet(qp, pkt, &ack_pkt,
+ IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
+ syndrome, NULL);
+ if (!skb) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ skb_copy = skb_clone(skb, GFP_ATOMIC);
+ if (skb_copy)
+ rxe_add_ref(qp); /* for the new SKB */
+ else {
+ pr_warn("Could not clone atomic response\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ res = &qp->resp.resources[qp->resp.res_head];
+ free_rd_atomic_resource(qp, res);
+ rxe_advance_resp_resource(qp);
+
+ res->type = RXE_ATOMIC_MASK;
+ res->atomic.skb = skb;
+ res->first_psn = qp->resp.psn;
+ res->last_psn = qp->resp.psn;
+ res->cur_psn = qp->resp.psn;
+
+ rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
+ if (rc) {
+ pr_err_ratelimited("Failed sending ack\n");
+ rxe_drop_ref(qp);
+ kfree_skb(skb_copy);
+ }
+
+out:
+ return rc;
+}
+
+static enum resp_states acknowledge(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ if (qp_type(qp) != IB_QPT_RC)
+ return RESPST_CLEANUP;
+
+ if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
+ send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);
+ else if (pkt->mask & RXE_ATOMIC_MASK)
+ send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);
+ else if (bth_ack(pkt))
+ send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);
+
+ return RESPST_CLEANUP;
+}
+
+static enum resp_states cleanup(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct sk_buff *skb;
+
+ if (pkt) {
+ skb = skb_dequeue(&qp->req_pkts);
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+
+ if (qp->resp.mr) {
+ rxe_drop_ref(qp->resp.mr);
+ qp->resp.mr = NULL;
+ }
+
+ return RESPST_DONE;
+}
+
+static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
+{
+ int i;
+
+ for (i = 0; i < qp->attr.max_rd_atomic; i++) {
+ struct resp_res *res = &qp->resp.resources[i];
+
+ if (res->type == 0)
+ continue;
+
+ if (psn_compare(psn, res->first_psn) >= 0 &&
+ psn_compare(psn, res->last_psn) <= 0) {
+ return res;
+ }
+ }
+
+ return NULL;
+}
+
+static enum resp_states duplicate_request(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ enum resp_states rc;
+
+ if (pkt->mask & RXE_SEND_MASK ||
+ pkt->mask & RXE_WRITE_MASK) {
+ /* SEND. Ack again and cleanup. C9-105. */
+ if (bth_ack(pkt))
+ send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1);
+ rc = RESPST_CLEANUP;
+ goto out;
+ } else if (pkt->mask & RXE_READ_MASK) {
+ struct resp_res *res;
+
+ res = find_resource(qp, pkt->psn);
+ if (!res) {
+ /* Resource not found. Class D error. Drop the
+ * request.
+ */
+ rc = RESPST_CLEANUP;
+ goto out;
+ } else {
+ /* Ensure this new request is the same as the previous
+ * one or a subset of it.
+ */
+ u64 iova = reth_va(pkt);
+ u32 resid = reth_len(pkt);
+
+ if (iova < res->read.va_org ||
+ resid > res->read.length ||
+ (iova + resid) > (res->read.va_org +
+ res->read.length)) {
+ rc = RESPST_CLEANUP;
+ goto out;
+ }
+
+ if (reth_rkey(pkt) != res->read.rkey) {
+ rc = RESPST_CLEANUP;
+ goto out;
+ }
+
+ res->cur_psn = pkt->psn;
+ res->state = (pkt->psn == res->first_psn) ?
+ rdatm_res_state_new :
+ rdatm_res_state_replay;
+
+ /* Reset the resource, except length. */
+ res->read.va_org = iova;
+ res->read.va = iova;
+ res->read.resid = resid;
+
+ /* Replay the RDMA read reply. */
+ qp->resp.res = res;
+ rc = RESPST_READ_REPLY;
+ goto out;
+ }
+ } else {
+ struct resp_res *res;
+
+ /* Find the operation in our list of responder resources. */
+ res = find_resource(qp, pkt->psn);
+ if (res) {
+ struct sk_buff *skb_copy;
+
+ skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC);
+ if (skb_copy) {
+ rxe_add_ref(qp); /* for the new SKB */
+ } else {
+ pr_warn("Couldn't clone atomic resp\n");
+ rc = RESPST_CLEANUP;
+ goto out;
+ }
+ bth_set_psn(SKB_TO_PKT(skb_copy),
+ qp->resp.psn - 1);
+ /* Resend the result. */
+ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
+ pkt, skb_copy);
+ if (rc) {
+ pr_err("Failed resending result. This flow is not handled - skb ignored\n");
+ kfree_skb(skb_copy);
+ rc = RESPST_CLEANUP;
+ goto out;
+ }
+ }
+
+ /* Resource not found. Class D error. Drop the request. */
+ rc = RESPST_CLEANUP;
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/* Process a class A or C. Both are treated the same in this implementation. */
+static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
+ enum ib_wc_status status)
+{
+ qp->resp.aeth_syndrome = syndrome;
+ qp->resp.status = status;
+
+ /* indicate that we should go through the ERROR state */
+ qp->resp.goto_error = 1;
+}
+
+static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
+{
+ /* UC */
+ if (qp->srq) {
+ /* Class E */
+ qp->resp.drop_msg = 1;
+ if (qp->resp.wqe) {
+ qp->resp.status = IB_WC_REM_INV_REQ_ERR;
+ return RESPST_COMPLETE;
+ } else {
+ return RESPST_CLEANUP;
+ }
+ } else {
+ /* Class D1. This packet may be the start of a
+ * new message and could be valid. The previous
+ * message is invalid and ignored. reset the
+ * recv wr to its original state
+ */
+ if (qp->resp.wqe) {
+ qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
+ qp->resp.wqe->dma.cur_sge = 0;
+ qp->resp.wqe->dma.sge_offset = 0;
+ qp->resp.opcode = -1;
+ }
+
+ if (qp->resp.mr) {
+ rxe_drop_ref(qp->resp.mr);
+ qp->resp.mr = NULL;
+ }
+
+ return RESPST_CLEANUP;
+ }
+}
+
+int rxe_responder(void *arg)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)arg;
+ enum resp_states state;
+ struct rxe_pkt_info *pkt = NULL;
+ int ret = 0;
+
+ qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
+
+ if (!qp->valid) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ switch (qp->resp.state) {
+ case QP_STATE_RESET:
+ state = RESPST_RESET;
+ break;
+
+ default:
+ state = RESPST_GET_REQ;
+ break;
+ }
+
+ while (1) {
+ pr_debug("state = %s\n", resp_state_name[state]);
+ switch (state) {
+ case RESPST_GET_REQ:
+ state = get_req(qp, &pkt);
+ break;
+ case RESPST_CHK_PSN:
+ state = check_psn(qp, pkt);
+ break;
+ case RESPST_CHK_OP_SEQ:
+ state = check_op_seq(qp, pkt);
+ break;
+ case RESPST_CHK_OP_VALID:
+ state = check_op_valid(qp, pkt);
+ break;
+ case RESPST_CHK_RESOURCE:
+ state = check_resource(qp, pkt);
+ break;
+ case RESPST_CHK_LENGTH:
+ state = check_length(qp, pkt);
+ break;
+ case RESPST_CHK_RKEY:
+ state = check_rkey(qp, pkt);
+ break;
+ case RESPST_EXECUTE:
+ state = execute(qp, pkt);
+ break;
+ case RESPST_COMPLETE:
+ state = do_complete(qp, pkt);
+ break;
+ case RESPST_READ_REPLY:
+ state = read_reply(qp, pkt);
+ break;
+ case RESPST_ACKNOWLEDGE:
+ state = acknowledge(qp, pkt);
+ break;
+ case RESPST_CLEANUP:
+ state = cleanup(qp, pkt);
+ break;
+ case RESPST_DUPLICATE_REQUEST:
+ state = duplicate_request(qp, pkt);
+ break;
+ case RESPST_ERR_PSN_OUT_OF_SEQ:
+ /* RC only - Class B. Drop packet. */
+ send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
+ state = RESPST_CLEANUP;
+ break;
+
+ case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
+ case RESPST_ERR_MISSING_OPCODE_FIRST:
+ case RESPST_ERR_MISSING_OPCODE_LAST_C:
+ case RESPST_ERR_UNSUPPORTED_OPCODE:
+ case RESPST_ERR_MISALIGNED_ATOMIC:
+ /* RC Only - Class C. */
+ do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
+ IB_WC_REM_INV_REQ_ERR);
+ state = RESPST_COMPLETE;
+ break;
+
+ case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
+ state = do_class_d1e_error(qp);
+ break;
+ case RESPST_ERR_RNR:
+ if (qp_type(qp) == IB_QPT_RC) {
+ /* RC - class B */
+ send_ack(qp, pkt, AETH_RNR_NAK |
+ (~AETH_TYPE_MASK &
+ qp->attr.min_rnr_timer),
+ pkt->psn);
+ } else {
+ /* UD/UC - class D */
+ qp->resp.drop_msg = 1;
+ }
+ state = RESPST_CLEANUP;
+ break;
+
+ case RESPST_ERR_RKEY_VIOLATION:
+ if (qp_type(qp) == IB_QPT_RC) {
+ /* Class C */
+ do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
+ IB_WC_REM_ACCESS_ERR);
+ state = RESPST_COMPLETE;
+ } else {
+ qp->resp.drop_msg = 1;
+ if (qp->srq) {
+ /* UC/SRQ Class D */
+ qp->resp.status = IB_WC_REM_ACCESS_ERR;
+ state = RESPST_COMPLETE;
+ } else {
+ /* UC/non-SRQ Class E. */
+ state = RESPST_CLEANUP;
+ }
+ }
+ break;
+
+ case RESPST_ERR_LENGTH:
+ if (qp_type(qp) == IB_QPT_RC) {
+ /* Class C */
+ do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
+ IB_WC_REM_INV_REQ_ERR);
+ state = RESPST_COMPLETE;
+ } else if (qp->srq) {
+ /* UC/UD - class E */
+ qp->resp.status = IB_WC_REM_INV_REQ_ERR;
+ state = RESPST_COMPLETE;
+ } else {
+ /* UC/UD - class D */
+ qp->resp.drop_msg = 1;
+ state = RESPST_CLEANUP;
+ }
+ break;
+
+ case RESPST_ERR_MALFORMED_WQE:
+ /* All, Class A. */
+ do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
+ IB_WC_LOC_QP_OP_ERR);
+ state = RESPST_COMPLETE;
+ break;
+
+ case RESPST_ERR_CQ_OVERFLOW:
+ /* All - Class G */
+ state = RESPST_ERROR;
+ break;
+
+ case RESPST_DONE:
+ if (qp->resp.goto_error) {
+ state = RESPST_ERROR;
+ break;
+ }
+
+ goto done;
+
+ case RESPST_EXIT:
+ if (qp->resp.goto_error) {
+ state = RESPST_ERROR;
+ break;
+ }
+
+ goto exit;
+
+ case RESPST_RESET: {
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+
+ while (!qp->srq && qp->rq.queue &&
+ queue_head(qp->rq.queue))
+ advance_consumer(qp->rq.queue);
+
+ qp->resp.wqe = NULL;
+ goto exit;
+ }
+
+ case RESPST_ERROR:
+ qp->resp.goto_error = 0;
+ pr_warn("qp#%d moved to error state\n", qp_num(qp));
+ rxe_qp_error(qp);
+ goto exit;
+
+ default:
+ WARN_ON(1);
+ }
+ }
+
+exit:
+ ret = -EAGAIN;
+done:
+ return ret;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
+{
+ if (srq && srq->error) {
+ pr_warn("srq in error state\n");
+ goto err1;
+ }
+
+ if (mask & IB_SRQ_MAX_WR) {
+ if (attr->max_wr > rxe->attr.max_srq_wr) {
+ pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ attr->max_wr, rxe->attr.max_srq_wr);
+ goto err1;
+ }
+
+ if (attr->max_wr <= 0) {
+ pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ goto err1;
+ }
+
+ if (srq && srq->limit && (attr->max_wr < srq->limit)) {
+ pr_warn("max_wr (%d) < srq->limit (%d)\n",
+ attr->max_wr, srq->limit);
+ goto err1;
+ }
+
+ if (attr->max_wr < RXE_MIN_SRQ_WR)
+ attr->max_wr = RXE_MIN_SRQ_WR;
+ }
+
+ if (mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit > rxe->attr.max_srq_wr) {
+ pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
+ attr->srq_limit, rxe->attr.max_srq_wr);
+ goto err1;
+ }
+
+ if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
+ pr_warn("srq_limit (%d) > cur limit(%d)\n",
+ attr->srq_limit,
+ srq->rq.queue->buf->index_mask);
+ goto err1;
+ }
+ }
+
+ if (mask == IB_SRQ_INIT_MASK) {
+ if (attr->max_sge > rxe->attr.max_srq_sge) {
+ pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+ attr->max_sge, rxe->attr.max_srq_sge);
+ goto err1;
+ }
+
+ if (attr->max_sge < RXE_MIN_SRQ_SGE)
+ attr->max_sge = RXE_MIN_SRQ_SGE;
+ }
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_init_attr *init,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ int err;
+ int srq_wqe_size;
+ struct rxe_queue *q;
+
+ srq->ibsrq.event_handler = init->event_handler;
+ srq->ibsrq.srq_context = init->srq_context;
+ srq->limit = init->attr.srq_limit;
+ srq->srq_num = srq->pelem.index;
+ srq->rq.max_wr = init->attr.max_wr;
+ srq->rq.max_sge = init->attr.max_sge;
+
+ srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
+
+ spin_lock_init(&srq->rq.producer_lock);
+ spin_lock_init(&srq->rq.consumer_lock);
+
+ q = rxe_queue_init(rxe, &srq->rq.max_wr,
+ srq_wqe_size);
+ if (!q) {
+ pr_warn("unable to allocate queue for srq\n");
+ return -ENOMEM;
+ }
+
+ srq->rq.queue = q;
+
+ err = do_mmap_info(rxe, udata, false, context, q->buf,
+ q->buf_size, &q->ip);
+ if (err)
+ return err;
+
+ if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) {
+ if (copy_to_user(udata->outbuf + sizeof(struct mminfo),
+ &srq->srq_num, sizeof(u32)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
+ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
+ struct ib_udata *udata)
+{
+ int err;
+ struct rxe_queue *q = srq->rq.queue;
+ struct mminfo mi = { .offset = 1, .size = 0};
+
+ if (mask & IB_SRQ_MAX_WR) {
+ /* Check that we can write the mminfo struct to user space */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ __u64 mi_addr;
+
+ /* Get address of user space mminfo struct */
+ err = ib_copy_from_udata(&mi_addr, udata,
+ sizeof(mi_addr));
+ if (err)
+ goto err1;
+
+ udata->outbuf = (void __user *)(unsigned long)mi_addr;
+ udata->outlen = sizeof(mi);
+
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *)udata->outbuf,
+ udata->outlen)) {
+ err = -EFAULT;
+ goto err1;
+ }
+ }
+
+ err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,
+ rcv_wqe_size(srq->rq.max_sge),
+ srq->rq.queue->ip ?
+ srq->rq.queue->ip->context :
+ NULL,
+ udata, &srq->rq.producer_lock,
+ &srq->rq.consumer_lock);
+ if (err)
+ goto err2;
+ }
+
+ if (mask & IB_SRQ_LIMIT)
+ srq->limit = attr->srq_limit;
+
+ return 0;
+
+err2:
+ rxe_queue_cleanup(q);
+ srq->rq.queue = NULL;
+err1:
+ return err;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_net.h"
+
+/* Copy argument and remove trailing CR. Return the new length. */
+static int sanitize_arg(const char *val, char *intf, int intf_len)
+{
+ int len;
+
+ if (!val)
+ return 0;
+
+ /* Remove newline. */
+ for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++)
+ intf[len] = val[len];
+ intf[len] = 0;
+
+ if (len == 0 || (val[len] != 0 && val[len] != '\n'))
+ return 0;
+
+ return len;
+}
+
+static void rxe_set_port_state(struct net_device *ndev)
+{
+ struct rxe_dev *rxe = net_to_rxe(ndev);
+ bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
+
+ if (!rxe)
+ goto out;
+
+ if (is_up)
+ rxe_port_up(rxe);
+ else
+ rxe_port_down(rxe); /* down for unknown state */
+out:
+ return;
+}
+
+static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
+{
+ int len;
+ int err = 0;
+ char intf[32];
+ struct net_device *ndev = NULL;
+ struct rxe_dev *rxe;
+
+ len = sanitize_arg(val, intf, sizeof(intf));
+ if (!len) {
+ pr_err("rxe: add: invalid interface name\n");
+ err = -EINVAL;
+ goto err;
+ }
+
+ ndev = dev_get_by_name(&init_net, intf);
+ if (!ndev) {
+ pr_err("interface %s not found\n", intf);
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (net_to_rxe(ndev)) {
+ pr_err("rxe: already configured on %s\n", intf);
+ err = -EINVAL;
+ goto err;
+ }
+
+ rxe = rxe_net_add(ndev);
+ if (!rxe) {
+ pr_err("rxe: failed to add %s\n", intf);
+ err = -EINVAL;
+ goto err;
+ }
+
+ rxe_set_port_state(ndev);
+ pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf);
+err:
+ if (ndev)
+ dev_put(ndev);
+ return err;
+}
+
+static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
+{
+ int len;
+ char intf[32];
+ struct rxe_dev *rxe;
+
+ len = sanitize_arg(val, intf, sizeof(intf));
+ if (!len) {
+ pr_err("rxe: add: invalid interface name\n");
+ return -EINVAL;
+ }
+
+ if (strncmp("all", intf, len) == 0) {
+ pr_info("rxe_sys: remove all");
+ rxe_remove_all();
+ return 0;
+ }
+
+ rxe = get_rxe_by_name(intf);
+
+ if (!rxe) {
+ pr_err("rxe: not configured on %s\n", intf);
+ return -EINVAL;
+ }
+
+ list_del(&rxe->list);
+ rxe_remove(rxe);
+
+ return 0;
+}
+
+static const struct kernel_param_ops rxe_add_ops = {
+ .set = rxe_param_set_add,
+};
+
+static const struct kernel_param_ops rxe_remove_ops = {
+ .set = rxe_param_set_remove,
+};
+
+module_param_cb(add, &rxe_add_ops, NULL, 0200);
+MODULE_PARM_DESC(add, "Create RXE device over network interface");
+module_param_cb(remove, &rxe_remove_ops, NULL, 0200);
+MODULE_PARM_DESC(remove, "Remove RXE device over network interface");
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/hardirq.h>
+
+#include "rxe_task.h"
+
+int __rxe_do_task(struct rxe_task *task)
+
+{
+ int ret;
+
+ while ((ret = task->func(task->arg)) == 0)
+ ;
+
+ task->ret = ret;
+
+ return ret;
+}
+
+/*
+ * this locking is due to a potential race where
+ * a second caller finds the task already running
+ * but looks just after the last call to func
+ */
+void rxe_do_task(unsigned long data)
+{
+ int cont;
+ int ret;
+ unsigned long flags;
+ struct rxe_task *task = (struct rxe_task *)data;
+
+ spin_lock_irqsave(&task->state_lock, flags);
+ switch (task->state) {
+ case TASK_STATE_START:
+ task->state = TASK_STATE_BUSY;
+ spin_unlock_irqrestore(&task->state_lock, flags);
+ break;
+
+ case TASK_STATE_BUSY:
+ task->state = TASK_STATE_ARMED;
+ /* fall through to */
+ case TASK_STATE_ARMED:
+ spin_unlock_irqrestore(&task->state_lock, flags);
+ return;
+
+ default:
+ spin_unlock_irqrestore(&task->state_lock, flags);
+ pr_warn("bad state = %d in rxe_do_task\n", task->state);
+ return;
+ }
+
+ do {
+ cont = 0;
+ ret = task->func(task->arg);
+
+ spin_lock_irqsave(&task->state_lock, flags);
+ switch (task->state) {
+ case TASK_STATE_BUSY:
+ if (ret)
+ task->state = TASK_STATE_START;
+ else
+ cont = 1;
+ break;
+
+ /* soneone tried to run the task since the last time we called
+ * func, so we will call one more time regardless of the
+ * return value
+ */
+ case TASK_STATE_ARMED:
+ task->state = TASK_STATE_BUSY;
+ cont = 1;
+ break;
+
+ default:
+ pr_warn("bad state = %d in rxe_do_task\n",
+ task->state);
+ }
+ spin_unlock_irqrestore(&task->state_lock, flags);
+ } while (cont);
+
+ task->ret = ret;
+}
+
+int rxe_init_task(void *obj, struct rxe_task *task,
+ void *arg, int (*func)(void *), char *name)
+{
+ task->obj = obj;
+ task->arg = arg;
+ task->func = func;
+ snprintf(task->name, sizeof(task->name), "%s", name);
+
+ tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
+
+ task->state = TASK_STATE_START;
+ spin_lock_init(&task->state_lock);
+
+ return 0;
+}
+
+void rxe_cleanup_task(struct rxe_task *task)
+{
+ tasklet_kill(&task->tasklet);
+}
+
+void rxe_run_task(struct rxe_task *task, int sched)
+{
+ if (sched)
+ tasklet_schedule(&task->tasklet);
+ else
+ rxe_do_task((unsigned long)task);
+}
+
+void rxe_disable_task(struct rxe_task *task)
+{
+ tasklet_disable(&task->tasklet);
+}
+
+void rxe_enable_task(struct rxe_task *task)
+{
+ tasklet_enable(&task->tasklet);
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_TASK_H
+#define RXE_TASK_H
+
+enum {
+ TASK_STATE_START = 0,
+ TASK_STATE_BUSY = 1,
+ TASK_STATE_ARMED = 2,
+};
+
+/*
+ * data structure to describe a 'task' which is a short
+ * function that returns 0 as long as it needs to be
+ * called again.
+ */
+struct rxe_task {
+ void *obj;
+ struct tasklet_struct tasklet;
+ int state;
+ spinlock_t state_lock; /* spinlock for task state */
+ void *arg;
+ int (*func)(void *arg);
+ int ret;
+ char name[16];
+};
+
+/*
+ * init rxe_task structure
+ * arg => parameter to pass to fcn
+ * fcn => function to call until it returns != 0
+ */
+int rxe_init_task(void *obj, struct rxe_task *task,
+ void *arg, int (*func)(void *), char *name);
+
+/* cleanup task */
+void rxe_cleanup_task(struct rxe_task *task);
+
+/*
+ * raw call to func in loop without any checking
+ * can call when tasklets are disabled
+ */
+int __rxe_do_task(struct rxe_task *task);
+
+/*
+ * common function called by any of the main tasklets
+ * If there is any chance that there is additional
+ * work to do someone must reschedule the task before
+ * leaving
+ */
+void rxe_do_task(unsigned long data);
+
+/* run a task, else schedule it to run as a tasklet, The decision
+ * to run or schedule tasklet is based on the parameter sched.
+ */
+void rxe_run_task(struct rxe_task *task, int sched);
+
+/* keep a task from scheduling */
+void rxe_disable_task(struct rxe_task *task);
+
+/* allow task to run */
+void rxe_enable_task(struct rxe_task *task);
+
+#endif /* RXE_TASK_H */
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_loc.h"
+#include "rxe_queue.h"
+
+static int rxe_query_device(struct ib_device *dev,
+ struct ib_device_attr *attr,
+ struct ib_udata *uhw)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+
+ if (uhw->inlen || uhw->outlen)
+ return -EINVAL;
+
+ *attr = rxe->attr;
+ return 0;
+}
+
+static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed,
+ u8 *active_width)
+{
+ if (speed <= 1000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ } else if (speed <= 10000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_FDR10;
+ } else if (speed <= 20000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_DDR;
+ } else if (speed <= 30000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ } else if (speed <= 40000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR10;
+ } else {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ }
+}
+
+static int rxe_query_port(struct ib_device *dev,
+ u8 port_num, struct ib_port_attr *attr)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_port *port;
+ u32 speed;
+
+ if (unlikely(port_num != 1)) {
+ pr_warn("invalid port_number %d\n", port_num);
+ goto err1;
+ }
+
+ port = &rxe->port;
+
+ *attr = port->attr;
+
+ mutex_lock(&rxe->usdev_lock);
+ if (rxe->ndev->ethtool_ops->get_link_ksettings) {
+ struct ethtool_link_ksettings ks;
+
+ rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks);
+ speed = ks.base.speed;
+ } else if (rxe->ndev->ethtool_ops->get_settings) {
+ struct ethtool_cmd cmd;
+
+ rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd);
+ speed = cmd.speed;
+ } else {
+ pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name);
+ speed = 1000;
+ }
+ rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width);
+ mutex_unlock(&rxe->usdev_lock);
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static int rxe_query_gid(struct ib_device *device,
+ u8 port_num, int index, union ib_gid *gid)
+{
+ int ret;
+
+ if (index > RXE_PORT_GID_TBL_LEN)
+ return -EINVAL;
+
+ ret = ib_get_cached_gid(device, port_num, index, gid, NULL);
+ if (ret == -EAGAIN) {
+ memcpy(gid, &zgid, sizeof(*gid));
+ return 0;
+ }
+
+ return ret;
+}
+
+static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int
+ index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context)
+{
+ if (index >= RXE_PORT_GID_TBL_LEN)
+ return -EINVAL;
+ return 0;
+}
+
+static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int
+ index, void **context)
+{
+ if (index >= RXE_PORT_GID_TBL_LEN)
+ return -EINVAL;
+ return 0;
+}
+
+static struct net_device *rxe_get_netdev(struct ib_device *device,
+ u8 port_num)
+{
+ struct rxe_dev *rxe = to_rdev(device);
+
+ if (rxe->ndev) {
+ dev_hold(rxe->ndev);
+ return rxe->ndev;
+ }
+
+ return NULL;
+}
+
+static int rxe_query_pkey(struct ib_device *device,
+ u8 port_num, u16 index, u16 *pkey)
+{
+ struct rxe_dev *rxe = to_rdev(device);
+ struct rxe_port *port;
+
+ if (unlikely(port_num != 1)) {
+ dev_warn(device->dma_device, "invalid port_num = %d\n",
+ port_num);
+ goto err1;
+ }
+
+ port = &rxe->port;
+
+ if (unlikely(index >= port->attr.pkey_tbl_len)) {
+ dev_warn(device->dma_device, "invalid index = %d\n",
+ index);
+ goto err1;
+ }
+
+ *pkey = port->pkey_tbl[index];
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static int rxe_modify_device(struct ib_device *dev,
+ int mask, struct ib_device_modify *attr)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+
+ if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
+ rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ memcpy(rxe->ib_dev.node_desc,
+ attr->node_desc, sizeof(rxe->ib_dev.node_desc));
+ }
+
+ return 0;
+}
+
+static int rxe_modify_port(struct ib_device *dev,
+ u8 port_num, int mask, struct ib_port_modify *attr)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_port *port;
+
+ if (unlikely(port_num != 1)) {
+ pr_warn("invalid port_num = %d\n", port_num);
+ goto err1;
+ }
+
+ port = &rxe->port;
+
+ port->attr.port_cap_flags |= attr->set_port_cap_mask;
+ port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
+
+ if (mask & IB_PORT_RESET_QKEY_CNTR)
+ port->attr.qkey_viol_cntr = 0;
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
+ u8 port_num)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+
+ return rxe->ifc_ops->link_layer(rxe, port_num);
+}
+
+static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
+ struct ib_udata *udata)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_ucontext *uc;
+
+ uc = rxe_alloc(&rxe->uc_pool);
+ return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
+}
+
+static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
+{
+ struct rxe_ucontext *uc = to_ruc(ibuc);
+
+ rxe_drop_ref(uc);
+ return 0;
+}
+
+static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ int err;
+ struct ib_port_attr attr;
+
+ err = rxe_query_port(dev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
+static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_pd *pd;
+
+ pd = rxe_alloc(&rxe->pd_pool);
+ return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
+}
+
+static int rxe_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct rxe_pd *pd = to_rpd(ibpd);
+
+ rxe_drop_ref(pd);
+ return 0;
+}
+
+static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
+ struct rxe_av *av)
+{
+ int err;
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+
+ err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num,
+ attr->grh.sgid_index, &sgid,
+ &sgid_attr);
+ if (err) {
+ pr_err("Failed to query sgid. err = %d\n", err);
+ return err;
+ }
+
+ err = rxe_av_from_attr(rxe, attr->port_num, av, attr);
+ if (!err)
+ err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid);
+
+ if (sgid_attr.ndev)
+ dev_put(sgid_attr.ndev);
+ return err;
+}
+
+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_ah *ah;
+
+ err = rxe_av_chk_attr(rxe, attr);
+ if (err)
+ goto err1;
+
+ ah = rxe_alloc(&rxe->ah_pool);
+ if (!ah) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ rxe_add_ref(pd);
+ ah->pd = pd;
+
+ err = rxe_init_av(rxe, attr, &ah->av);
+ if (err)
+ goto err2;
+
+ return &ah->ibah;
+
+err2:
+ rxe_drop_ref(pd);
+ rxe_drop_ref(ah);
+err1:
+ return ERR_PTR(err);
+}
+
+static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibah->device);
+ struct rxe_ah *ah = to_rah(ibah);
+
+ err = rxe_av_chk_attr(rxe, attr);
+ if (err)
+ return err;
+
+ err = rxe_init_av(rxe, attr, &ah->av);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ struct rxe_dev *rxe = to_rdev(ibah->device);
+ struct rxe_ah *ah = to_rah(ibah);
+
+ rxe_av_to_attr(rxe, &ah->av, attr);
+ return 0;
+}
+
+static int rxe_destroy_ah(struct ib_ah *ibah)
+{
+ struct rxe_ah *ah = to_rah(ibah);
+
+ rxe_drop_ref(ah->pd);
+ rxe_drop_ref(ah);
+ return 0;
+}
+
+static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)
+{
+ int err;
+ int i;
+ u32 length;
+ struct rxe_recv_wqe *recv_wqe;
+ int num_sge = ibwr->num_sge;
+
+ if (unlikely(queue_full(rq->queue))) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ if (unlikely(num_sge > rq->max_sge)) {
+ err = -EINVAL;
+ goto err1;
+ }
+
+ length = 0;
+ for (i = 0; i < num_sge; i++)
+ length += ibwr->sg_list[i].length;
+
+ recv_wqe = producer_addr(rq->queue);
+ recv_wqe->wr_id = ibwr->wr_id;
+ recv_wqe->num_sge = num_sge;
+
+ memcpy(recv_wqe->dma.sge, ibwr->sg_list,
+ num_sge * sizeof(struct ib_sge));
+
+ recv_wqe->dma.length = length;
+ recv_wqe->dma.resid = length;
+ recv_wqe->dma.num_sge = num_sge;
+ recv_wqe->dma.cur_sge = 0;
+ recv_wqe->dma.sge_offset = 0;
+
+ /* make sure all changes to the work queue are written before we
+ * update the producer pointer
+ */
+ smp_wmb();
+
+ advance_producer(rq->queue);
+ return 0;
+
+err1:
+ return err;
+}
+
+static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *init,
+ struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_srq *srq;
+ struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
+
+ err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
+ if (err)
+ goto err1;
+
+ srq = rxe_alloc(&rxe->srq_pool);
+ if (!srq) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ rxe_add_index(srq);
+ rxe_add_ref(pd);
+ srq->pd = pd;
+
+ err = rxe_srq_from_init(rxe, srq, init, context, udata);
+ if (err)
+ goto err2;
+
+ return &srq->ibsrq;
+
+err2:
+ rxe_drop_ref(pd);
+ rxe_drop_index(srq);
+ rxe_drop_ref(srq);
+err1:
+ return ERR_PTR(err);
+}
+
+static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask mask,
+ struct ib_udata *udata)
+{
+ int err;
+ struct rxe_srq *srq = to_rsrq(ibsrq);
+ struct rxe_dev *rxe = to_rdev(ibsrq->device);
+
+ err = rxe_srq_chk_attr(rxe, srq, attr, mask);
+ if (err)
+ goto err1;
+
+ err = rxe_srq_from_attr(rxe, srq, attr, mask, udata);
+ if (err)
+ goto err1;
+
+ return 0;
+
+err1:
+ return err;
+}
+
+static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct rxe_srq *srq = to_rsrq(ibsrq);
+
+ if (srq->error)
+ return -EINVAL;
+
+ attr->max_wr = srq->rq.queue->buf->index_mask;
+ attr->max_sge = srq->rq.max_sge;
+ attr->srq_limit = srq->limit;
+ return 0;
+}
+
+static int rxe_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct rxe_srq *srq = to_rsrq(ibsrq);
+
+ if (srq->rq.queue)
+ rxe_queue_cleanup(srq->rq.queue);
+
+ rxe_drop_ref(srq->pd);
+ rxe_drop_index(srq);
+ rxe_drop_ref(srq);
+
+ return 0;
+}
+
+static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ unsigned long flags;
+ struct rxe_srq *srq = to_rsrq(ibsrq);
+
+ spin_lock_irqsave(&srq->rq.producer_lock, flags);
+
+ while (wr) {
+ err = post_one_recv(&srq->rq, wr);
+ if (unlikely(err))
+ break;
+ wr = wr->next;
+ }
+
+ spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
+
+ if (err)
+ *bad_wr = wr;
+
+ return err;
+}
+
+static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init,
+ struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_qp *qp;
+
+ err = rxe_qp_chk_init(rxe, init);
+ if (err)
+ goto err1;
+
+ qp = rxe_alloc(&rxe->qp_pool);
+ if (!qp) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ if (udata) {
+ if (udata->inlen) {
+ err = -EINVAL;
+ goto err1;
+ }
+ qp->is_user = 1;
+ }
+
+ rxe_add_index(qp);
+
+ err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);
+ if (err)
+ goto err2;
+
+ return &qp->ibqp;
+
+err2:
+ rxe_drop_index(qp);
+ rxe_drop_ref(qp);
+err1:
+ return ERR_PTR(err);
+}
+
+static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int mask, struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+
+ err = rxe_qp_chk_attr(rxe, qp, attr, mask);
+ if (err)
+ goto err1;
+
+ err = rxe_qp_from_attr(qp, attr, mask, udata);
+ if (err)
+ goto err1;
+
+ return 0;
+
+err1:
+ return err;
+}
+
+static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int mask, struct ib_qp_init_attr *init)
+{
+ struct rxe_qp *qp = to_rqp(ibqp);
+
+ rxe_qp_to_init(qp, init);
+ rxe_qp_to_attr(qp, attr, mask);
+
+ return 0;
+}
+
+static int rxe_destroy_qp(struct ib_qp *ibqp)
+{
+ struct rxe_qp *qp = to_rqp(ibqp);
+
+ rxe_qp_destroy(qp);
+ rxe_drop_index(qp);
+ rxe_drop_ref(qp);
+ return 0;
+}
+
+static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+ unsigned int mask, unsigned int length)
+{
+ int num_sge = ibwr->num_sge;
+ struct rxe_sq *sq = &qp->sq;
+
+ if (unlikely(num_sge > sq->max_sge))
+ goto err1;
+
+ if (unlikely(mask & WR_ATOMIC_MASK)) {
+ if (length < 8)
+ goto err1;
+
+ if (atomic_wr(ibwr)->remote_addr & 0x7)
+ goto err1;
+ }
+
+ if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
+ (length > sq->max_inline)))
+ goto err1;
+
+ return 0;
+
+err1:
+ return -EINVAL;
+}
+
+static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
+ struct ib_send_wr *ibwr)
+{
+ wr->wr_id = ibwr->wr_id;
+ wr->num_sge = ibwr->num_sge;
+ wr->opcode = ibwr->opcode;
+ wr->send_flags = ibwr->send_flags;
+
+ if (qp_type(qp) == IB_QPT_UD ||
+ qp_type(qp) == IB_QPT_SMI ||
+ qp_type(qp) == IB_QPT_GSI) {
+ wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
+ wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
+ if (qp_type(qp) == IB_QPT_GSI)
+ wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
+ if (wr->opcode == IB_WR_SEND_WITH_IMM)
+ wr->ex.imm_data = ibwr->ex.imm_data;
+ } else {
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wr->ex.imm_data = ibwr->ex.imm_data;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
+ wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wr->ex.imm_data = ibwr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ wr->wr.atomic.remote_addr =
+ atomic_wr(ibwr)->remote_addr;
+ wr->wr.atomic.compare_add =
+ atomic_wr(ibwr)->compare_add;
+ wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
+ wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
+ break;
+ case IB_WR_LOCAL_INV:
+ wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
+ break;
+ case IB_WR_REG_MR:
+ wr->wr.reg.mr = reg_wr(ibwr)->mr;
+ wr->wr.reg.key = reg_wr(ibwr)->key;
+ wr->wr.reg.access = reg_wr(ibwr)->access;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+ unsigned int mask, unsigned int length,
+ struct rxe_send_wqe *wqe)
+{
+ int num_sge = ibwr->num_sge;
+ struct ib_sge *sge;
+ int i;
+ u8 *p;
+
+ init_send_wr(qp, &wqe->wr, ibwr);
+
+ if (qp_type(qp) == IB_QPT_UD ||
+ qp_type(qp) == IB_QPT_SMI ||
+ qp_type(qp) == IB_QPT_GSI)
+ memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
+
+ if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
+ p = wqe->dma.inline_data;
+
+ sge = ibwr->sg_list;
+ for (i = 0; i < num_sge; i++, sge++) {
+ if (qp->is_user && copy_from_user(p, (__user void *)
+ (uintptr_t)sge->addr, sge->length))
+ return -EFAULT;
+
+ else if (!qp->is_user)
+ memcpy(p, (void *)(uintptr_t)sge->addr,
+ sge->length);
+
+ p += sge->length;
+ }
+ } else if (mask & WR_REG_MASK) {
+ wqe->mask = mask;
+ wqe->state = wqe_state_posted;
+ return 0;
+ } else
+ memcpy(wqe->dma.sge, ibwr->sg_list,
+ num_sge * sizeof(struct ib_sge));
+
+ wqe->iova = (mask & WR_ATOMIC_MASK) ?
+ atomic_wr(ibwr)->remote_addr :
+ rdma_wr(ibwr)->remote_addr;
+ wqe->mask = mask;
+ wqe->dma.length = length;
+ wqe->dma.resid = length;
+ wqe->dma.num_sge = num_sge;
+ wqe->dma.cur_sge = 0;
+ wqe->dma.sge_offset = 0;
+ wqe->state = wqe_state_posted;
+ wqe->ssn = atomic_add_return(1, &qp->ssn);
+
+ return 0;
+}
+
+static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+ unsigned mask, u32 length)
+{
+ int err;
+ struct rxe_sq *sq = &qp->sq;
+ struct rxe_send_wqe *send_wqe;
+ unsigned long flags;
+
+ err = validate_send_wr(qp, ibwr, mask, length);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&qp->sq.sq_lock, flags);
+
+ if (unlikely(queue_full(sq->queue))) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ send_wqe = producer_addr(sq->queue);
+
+ err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
+ if (unlikely(err))
+ goto err1;
+
+ /*
+ * make sure all changes to the work queue are
+ * written before we update the producer pointer
+ */
+ smp_wmb();
+
+ advance_producer(sq->queue);
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
+
+ return 0;
+
+err1:
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
+ return err;
+}
+
+static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int err = 0;
+ struct rxe_qp *qp = to_rqp(ibqp);
+ unsigned int mask;
+ unsigned int length = 0;
+ int i;
+ int must_sched;
+
+ if (unlikely(!qp->valid)) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ if (unlikely(qp->req.state < QP_STATE_READY)) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ while (wr) {
+ mask = wr_opcode_mask(wr->opcode, qp);
+ if (unlikely(!mask)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
+ !(mask & WR_INLINE_MASK))) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ length = 0;
+ for (i = 0; i < wr->num_sge; i++)
+ length += wr->sg_list[i].length;
+
+ err = post_one_send(qp, wr, mask, length);
+
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+
+ /*
+ * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
+ * and the requester call ip_local_out_sk() that takes spin_lock_bh.
+ */
+ must_sched = (qp_type(qp) == IB_QPT_GSI) ||
+ (queue_count(qp->sq.queue) > 1);
+
+ rxe_run_task(&qp->req.task, must_sched);
+
+ return err;
+}
+
+static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ struct rxe_qp *qp = to_rqp(ibqp);
+ struct rxe_rq *rq = &qp->rq;
+ unsigned long flags;
+
+ if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
+ *bad_wr = wr;
+ err = -EINVAL;
+ goto err1;
+ }
+
+ if (unlikely(qp->srq)) {
+ *bad_wr = wr;
+ err = -EINVAL;
+ goto err1;
+ }
+
+ spin_lock_irqsave(&rq->producer_lock, flags);
+
+ while (wr) {
+ err = post_one_recv(rq, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+
+ spin_unlock_irqrestore(&rq->producer_lock, flags);
+
+err1:
+ return err;
+}
+
+static struct ib_cq *rxe_create_cq(struct ib_device *dev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_cq *cq;
+
+ if (attr->flags)
+ return ERR_PTR(-EINVAL);
+
+ err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata);
+ if (err)
+ goto err1;
+
+ cq = rxe_alloc(&rxe->cq_pool);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
+ context, udata);
+ if (err)
+ goto err2;
+
+ return &cq->ibcq;
+
+err2:
+ rxe_drop_ref(cq);
+err1:
+ return ERR_PTR(err);
+}
+
+static int rxe_destroy_cq(struct ib_cq *ibcq)
+{
+ struct rxe_cq *cq = to_rcq(ibcq);
+
+ rxe_drop_ref(cq);
+ return 0;
+}
+
+static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+{
+ int err;
+ struct rxe_cq *cq = to_rcq(ibcq);
+ struct rxe_dev *rxe = to_rdev(ibcq->device);
+
+ err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata);
+ if (err)
+ goto err1;
+
+ err = rxe_cq_resize_queue(cq, cqe, udata);
+ if (err)
+ goto err1;
+
+ return 0;
+
+err1:
+ return err;
+}
+
+static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ int i;
+ struct rxe_cq *cq = to_rcq(ibcq);
+ struct rxe_cqe *cqe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ cqe = queue_head(cq->queue);
+ if (!cqe)
+ break;
+
+ memcpy(wc++, &cqe->ibwc, sizeof(*wc));
+ advance_consumer(cq->queue);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return i;
+}
+
+static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
+{
+ struct rxe_cq *cq = to_rcq(ibcq);
+ int count = queue_count(cq->queue);
+
+ return (count > wc_cnt) ? wc_cnt : count;
+}
+
+static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct rxe_cq *cq = to_rcq(ibcq);
+
+ if (cq->notify != IB_CQ_NEXT_COMP)
+ cq->notify = flags & IB_CQ_SOLICITED_MASK;
+
+ return 0;
+}
+
+static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
+{
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_mem *mr;
+ int err;
+
+ mr = rxe_alloc(&rxe->mr_pool);
+ if (!mr) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ rxe_add_index(mr);
+
+ rxe_add_ref(pd);
+
+ err = rxe_mem_init_dma(rxe, pd, access, mr);
+ if (err)
+ goto err2;
+
+ return &mr->ibmr;
+
+err2:
+ rxe_drop_ref(pd);
+ rxe_drop_index(mr);
+ rxe_drop_ref(mr);
+err1:
+ return ERR_PTR(err);
+}
+
+static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
+ u64 start,
+ u64 length,
+ u64 iova,
+ int access, struct ib_udata *udata)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_mem *mr;
+
+ mr = rxe_alloc(&rxe->mr_pool);
+ if (!mr) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
+ rxe_add_index(mr);
+
+ rxe_add_ref(pd);
+
+ err = rxe_mem_init_user(rxe, pd, start, length, iova,
+ access, udata, mr);
+ if (err)
+ goto err3;
+
+ return &mr->ibmr;
+
+err3:
+ rxe_drop_ref(pd);
+ rxe_drop_index(mr);
+ rxe_drop_ref(mr);
+err2:
+ return ERR_PTR(err);
+}
+
+static int rxe_dereg_mr(struct ib_mr *ibmr)
+{
+ struct rxe_mem *mr = to_rmr(ibmr);
+
+ mr->state = RXE_MEM_STATE_ZOMBIE;
+ rxe_drop_ref(mr->pd);
+ rxe_drop_index(mr);
+ rxe_drop_ref(mr);
+ return 0;
+}
+
+static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_mem *mr;
+ int err;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ mr = rxe_alloc(&rxe->mr_pool);
+ if (!mr) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ rxe_add_index(mr);
+
+ rxe_add_ref(pd);
+
+ err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr);
+ if (err)
+ goto err2;
+
+ return &mr->ibmr;
+
+err2:
+ rxe_drop_ref(pd);
+ rxe_drop_index(mr);
+ rxe_drop_ref(mr);
+err1:
+ return ERR_PTR(err);
+}
+
+static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct rxe_mem *mr = to_rmr(ibmr);
+ struct rxe_map *map;
+ struct rxe_phys_buf *buf;
+
+ if (unlikely(mr->nbuf == mr->num_buf))
+ return -ENOMEM;
+
+ map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
+ buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
+
+ buf->addr = addr;
+ buf->size = ibmr->page_size;
+ mr->nbuf++;
+
+ return 0;
+}
+
+static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct rxe_mem *mr = to_rmr(ibmr);
+ int n;
+
+ mr->nbuf = 0;
+
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
+
+ mr->va = ibmr->iova;
+ mr->iova = ibmr->iova;
+ mr->length = ibmr->length;
+ mr->page_shift = ilog2(ibmr->page_size);
+ mr->page_mask = ibmr->page_size - 1;
+ mr->offset = mr->iova & mr->page_mask;
+
+ return n;
+}
+
+static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
+{
+ int err;
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+ struct rxe_mc_grp *grp;
+
+ /* takes a ref on grp if successful */
+ err = rxe_mcast_get_grp(rxe, mgid, &grp);
+ if (err)
+ return err;
+
+ err = rxe_mcast_add_grp_elem(rxe, qp, grp);
+
+ rxe_drop_ref(grp);
+ return err;
+}
+
+static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
+{
+ struct rxe_dev *rxe = to_rdev(ibqp->device);
+ struct rxe_qp *qp = to_rqp(ibqp);
+
+ return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
+}
+
+static ssize_t rxe_show_parent(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct rxe_dev *rxe = container_of(device, struct rxe_dev,
+ ib_dev.dev);
+ char *name;
+
+ name = rxe->ifc_ops->parent_name(rxe, 1);
+ return snprintf(buf, 16, "%s\n", name);
+}
+
+static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
+
+static struct device_attribute *rxe_dev_attributes[] = {
+ &dev_attr_parent,
+};
+
+int rxe_register_device(struct rxe_dev *rxe)
+{
+ int err;
+ int i;
+ struct ib_device *dev = &rxe->ib_dev;
+
+ strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
+ strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
+
+ dev->owner = THIS_MODULE;
+ dev->node_type = RDMA_NODE_IB_CA;
+ dev->phys_port_cnt = 1;
+ dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
+ dev->dma_device = rxe->ifc_ops->dma_device(rxe);
+ dev->local_dma_lkey = 0;
+ dev->node_guid = rxe->ifc_ops->node_guid(rxe);
+ dev->dma_ops = &rxe_dma_mapping_ops;
+
+ dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
+ dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
+ | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
+ | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
+ | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
+ | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
+ | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
+ | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
+ | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
+ | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
+ | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
+ | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
+ | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
+ | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
+ | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
+ ;
+
+ dev->query_device = rxe_query_device;
+ dev->modify_device = rxe_modify_device;
+ dev->query_port = rxe_query_port;
+ dev->modify_port = rxe_modify_port;
+ dev->get_link_layer = rxe_get_link_layer;
+ dev->query_gid = rxe_query_gid;
+ dev->get_netdev = rxe_get_netdev;
+ dev->add_gid = rxe_add_gid;
+ dev->del_gid = rxe_del_gid;
+ dev->query_pkey = rxe_query_pkey;
+ dev->alloc_ucontext = rxe_alloc_ucontext;
+ dev->dealloc_ucontext = rxe_dealloc_ucontext;
+ dev->mmap = rxe_mmap;
+ dev->get_port_immutable = rxe_port_immutable;
+ dev->alloc_pd = rxe_alloc_pd;
+ dev->dealloc_pd = rxe_dealloc_pd;
+ dev->create_ah = rxe_create_ah;
+ dev->modify_ah = rxe_modify_ah;
+ dev->query_ah = rxe_query_ah;
+ dev->destroy_ah = rxe_destroy_ah;
+ dev->create_srq = rxe_create_srq;
+ dev->modify_srq = rxe_modify_srq;
+ dev->query_srq = rxe_query_srq;
+ dev->destroy_srq = rxe_destroy_srq;
+ dev->post_srq_recv = rxe_post_srq_recv;
+ dev->create_qp = rxe_create_qp;
+ dev->modify_qp = rxe_modify_qp;
+ dev->query_qp = rxe_query_qp;
+ dev->destroy_qp = rxe_destroy_qp;
+ dev->post_send = rxe_post_send;
+ dev->post_recv = rxe_post_recv;
+ dev->create_cq = rxe_create_cq;
+ dev->destroy_cq = rxe_destroy_cq;
+ dev->resize_cq = rxe_resize_cq;
+ dev->poll_cq = rxe_poll_cq;
+ dev->peek_cq = rxe_peek_cq;
+ dev->req_notify_cq = rxe_req_notify_cq;
+ dev->get_dma_mr = rxe_get_dma_mr;
+ dev->reg_user_mr = rxe_reg_user_mr;
+ dev->dereg_mr = rxe_dereg_mr;
+ dev->alloc_mr = rxe_alloc_mr;
+ dev->map_mr_sg = rxe_map_mr_sg;
+ dev->attach_mcast = rxe_attach_mcast;
+ dev->detach_mcast = rxe_detach_mcast;
+
+ err = ib_register_device(dev, NULL);
+ if (err) {
+ pr_warn("rxe_register_device failed, err = %d\n", err);
+ goto err1;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
+ err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
+ if (err) {
+ pr_warn("device_create_file failed, i = %d, err = %d\n",
+ i, err);
+ goto err2;
+ }
+ }
+
+ return 0;
+
+err2:
+ ib_unregister_device(dev);
+err1:
+ return err;
+}
+
+int rxe_unregister_device(struct rxe_dev *rxe)
+{
+ int i;
+ struct ib_device *dev = &rxe->ib_dev;
+
+ for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
+ device_remove_file(&dev->dev, rxe_dev_attributes[i]);
+
+ ib_unregister_device(dev);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_VERBS_H
+#define RXE_VERBS_H
+
+#include <linux/interrupt.h>
+#include <rdma/rdma_user_rxe.h>
+#include "rxe_pool.h"
+#include "rxe_task.h"
+
+static inline int pkey_match(u16 key1, u16 key2)
+{
+ return (((key1 & 0x7fff) != 0) &&
+ ((key1 & 0x7fff) == (key2 & 0x7fff)) &&
+ ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0;
+}
+
+/* Return >0 if psn_a > psn_b
+ * 0 if psn_a == psn_b
+ * <0 if psn_a < psn_b
+ */
+static inline int psn_compare(u32 psn_a, u32 psn_b)
+{
+ s32 diff;
+
+ diff = (psn_a - psn_b) << 8;
+ return diff;
+}
+
+struct rxe_ucontext {
+ struct rxe_pool_entry pelem;
+ struct ib_ucontext ibuc;
+};
+
+struct rxe_pd {
+ struct rxe_pool_entry pelem;
+ struct ib_pd ibpd;
+};
+
+struct rxe_ah {
+ struct rxe_pool_entry pelem;
+ struct ib_ah ibah;
+ struct rxe_pd *pd;
+ struct rxe_av av;
+};
+
+struct rxe_cqe {
+ union {
+ struct ib_wc ibwc;
+ struct ib_uverbs_wc uibwc;
+ };
+};
+
+struct rxe_cq {
+ struct rxe_pool_entry pelem;
+ struct ib_cq ibcq;
+ struct rxe_queue *queue;
+ spinlock_t cq_lock;
+ u8 notify;
+ int is_user;
+ struct tasklet_struct comp_task;
+};
+
+enum wqe_state {
+ wqe_state_posted,
+ wqe_state_processing,
+ wqe_state_pending,
+ wqe_state_done,
+ wqe_state_error,
+};
+
+struct rxe_sq {
+ int max_wr;
+ int max_sge;
+ int max_inline;
+ spinlock_t sq_lock; /* guard queue */
+ struct rxe_queue *queue;
+};
+
+struct rxe_rq {
+ int max_wr;
+ int max_sge;
+ spinlock_t producer_lock; /* guard queue producer */
+ spinlock_t consumer_lock; /* guard queue consumer */
+ struct rxe_queue *queue;
+};
+
+struct rxe_srq {
+ struct rxe_pool_entry pelem;
+ struct ib_srq ibsrq;
+ struct rxe_pd *pd;
+ struct rxe_rq rq;
+ u32 srq_num;
+
+ int limit;
+ int error;
+};
+
+enum rxe_qp_state {
+ QP_STATE_RESET,
+ QP_STATE_INIT,
+ QP_STATE_READY,
+ QP_STATE_DRAIN, /* req only */
+ QP_STATE_DRAINED, /* req only */
+ QP_STATE_ERROR
+};
+
+extern char *rxe_qp_state_name[];
+
+struct rxe_req_info {
+ enum rxe_qp_state state;
+ int wqe_index;
+ u32 psn;
+ int opcode;
+ atomic_t rd_atomic;
+ int wait_fence;
+ int need_rd_atomic;
+ int wait_psn;
+ int need_retry;
+ int noack_pkts;
+ struct rxe_task task;
+};
+
+struct rxe_comp_info {
+ u32 psn;
+ int opcode;
+ int timeout;
+ int timeout_retry;
+ u32 retry_cnt;
+ u32 rnr_retry;
+ struct rxe_task task;
+};
+
+enum rdatm_res_state {
+ rdatm_res_state_next,
+ rdatm_res_state_new,
+ rdatm_res_state_replay,
+};
+
+struct resp_res {
+ int type;
+ u32 first_psn;
+ u32 last_psn;
+ u32 cur_psn;
+ enum rdatm_res_state state;
+
+ union {
+ struct {
+ struct sk_buff *skb;
+ } atomic;
+ struct {
+ struct rxe_mem *mr;
+ u64 va_org;
+ u32 rkey;
+ u32 length;
+ u64 va;
+ u32 resid;
+ } read;
+ };
+};
+
+struct rxe_resp_info {
+ enum rxe_qp_state state;
+ u32 msn;
+ u32 psn;
+ int opcode;
+ int drop_msg;
+ int goto_error;
+ int sent_psn_nak;
+ enum ib_wc_status status;
+ u8 aeth_syndrome;
+
+ /* Receive only */
+ struct rxe_recv_wqe *wqe;
+
+ /* RDMA read / atomic only */
+ u64 va;
+ struct rxe_mem *mr;
+ u32 resid;
+ u32 rkey;
+ u64 atomic_orig;
+
+ /* SRQ only */
+ struct {
+ struct rxe_recv_wqe wqe;
+ struct ib_sge sge[RXE_MAX_SGE];
+ } srq_wqe;
+
+ /* Responder resources. It's a circular list where the oldest
+ * resource is dropped first.
+ */
+ struct resp_res *resources;
+ unsigned int res_head;
+ unsigned int res_tail;
+ struct resp_res *res;
+ struct rxe_task task;
+};
+
+struct rxe_qp {
+ struct rxe_pool_entry pelem;
+ struct ib_qp ibqp;
+ struct ib_qp_attr attr;
+ unsigned int valid;
+ unsigned int mtu;
+ int is_user;
+
+ struct rxe_pd *pd;
+ struct rxe_srq *srq;
+ struct rxe_cq *scq;
+ struct rxe_cq *rcq;
+
+ enum ib_sig_type sq_sig_type;
+
+ struct rxe_sq sq;
+ struct rxe_rq rq;
+
+ struct socket *sk;
+
+ struct rxe_av pri_av;
+ struct rxe_av alt_av;
+
+ /* list of mcast groups qp has joined (for cleanup) */
+ struct list_head grp_list;
+ spinlock_t grp_lock; /* guard grp_list */
+
+ struct sk_buff_head req_pkts;
+ struct sk_buff_head resp_pkts;
+ struct sk_buff_head send_pkts;
+
+ struct rxe_req_info req;
+ struct rxe_comp_info comp;
+ struct rxe_resp_info resp;
+
+ atomic_t ssn;
+ atomic_t skb_out;
+ int need_req_skb;
+
+ /* Timer for retranmitting packet when ACKs have been lost. RC
+ * only. The requester sets it when it is not already
+ * started. The responder resets it whenever an ack is
+ * received.
+ */
+ struct timer_list retrans_timer;
+ u64 qp_timeout_jiffies;
+
+ /* Timer for handling RNR NAKS. */
+ struct timer_list rnr_nak_timer;
+
+ spinlock_t state_lock; /* guard requester and completer */
+};
+
+enum rxe_mem_state {
+ RXE_MEM_STATE_ZOMBIE,
+ RXE_MEM_STATE_INVALID,
+ RXE_MEM_STATE_FREE,
+ RXE_MEM_STATE_VALID,
+};
+
+enum rxe_mem_type {
+ RXE_MEM_TYPE_NONE,
+ RXE_MEM_TYPE_DMA,
+ RXE_MEM_TYPE_MR,
+ RXE_MEM_TYPE_FMR,
+ RXE_MEM_TYPE_MW,
+};
+
+#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf))
+
+struct rxe_phys_buf {
+ u64 addr;
+ u64 size;
+};
+
+struct rxe_map {
+ struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
+};
+
+struct rxe_mem {
+ struct rxe_pool_entry pelem;
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ };
+
+ struct rxe_pd *pd;
+ struct ib_umem *umem;
+
+ u32 lkey;
+ u32 rkey;
+
+ enum rxe_mem_state state;
+ enum rxe_mem_type type;
+ u64 va;
+ u64 iova;
+ size_t length;
+ u32 offset;
+ int access;
+
+ int page_shift;
+ int page_mask;
+ int map_shift;
+ int map_mask;
+
+ u32 num_buf;
+ u32 nbuf;
+
+ u32 max_buf;
+ u32 num_map;
+
+ struct rxe_map **map;
+};
+
+struct rxe_mc_grp {
+ struct rxe_pool_entry pelem;
+ spinlock_t mcg_lock; /* guard group */
+ struct rxe_dev *rxe;
+ struct list_head qp_list;
+ union ib_gid mgid;
+ int num_qp;
+ u32 qkey;
+ u16 pkey;
+};
+
+struct rxe_mc_elem {
+ struct rxe_pool_entry pelem;
+ struct list_head qp_list;
+ struct list_head grp_list;
+ struct rxe_qp *qp;
+ struct rxe_mc_grp *grp;
+};
+
+struct rxe_port {
+ struct ib_port_attr attr;
+ u16 *pkey_tbl;
+ __be64 port_guid;
+ __be64 subnet_prefix;
+ spinlock_t port_lock; /* guard port */
+ unsigned int mtu_cap;
+ /* special QPs */
+ u32 qp_smi_index;
+ u32 qp_gsi_index;
+};
+
+/* callbacks from rdma_rxe to network interface layer */
+struct rxe_ifc_ops {
+ void (*release)(struct rxe_dev *rxe);
+ __be64 (*node_guid)(struct rxe_dev *rxe);
+ __be64 (*port_guid)(struct rxe_dev *rxe);
+ struct device *(*dma_device)(struct rxe_dev *rxe);
+ int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
+ int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
+ int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 *crc);
+ int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb);
+ int (*loopback)(struct sk_buff *skb);
+ struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
+ int paylen, struct rxe_pkt_info *pkt);
+ char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
+ enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
+ unsigned int port_num);
+};
+
+struct rxe_dev {
+ struct ib_device ib_dev;
+ struct ib_device_attr attr;
+ int max_ucontext;
+ int max_inline_data;
+ struct kref ref_cnt;
+ struct mutex usdev_lock;
+
+ struct rxe_ifc_ops *ifc_ops;
+
+ struct net_device *ndev;
+
+ int xmit_errors;
+
+ struct rxe_pool uc_pool;
+ struct rxe_pool pd_pool;
+ struct rxe_pool ah_pool;
+ struct rxe_pool srq_pool;
+ struct rxe_pool qp_pool;
+ struct rxe_pool cq_pool;
+ struct rxe_pool mr_pool;
+ struct rxe_pool mw_pool;
+ struct rxe_pool mc_grp_pool;
+ struct rxe_pool mc_elem_pool;
+
+ spinlock_t pending_lock; /* guard pending_mmaps */
+ struct list_head pending_mmaps;
+
+ spinlock_t mmap_offset_lock; /* guard mmap_offset */
+ int mmap_offset;
+
+ struct rxe_port port;
+ struct list_head list;
+};
+
+static inline struct rxe_dev *to_rdev(struct ib_device *dev)
+{
+ return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
+}
+
+static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc)
+{
+ return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL;
+}
+
+static inline struct rxe_pd *to_rpd(struct ib_pd *pd)
+{
+ return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;
+}
+
+static inline struct rxe_ah *to_rah(struct ib_ah *ah)
+{
+ return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;
+}
+
+static inline struct rxe_srq *to_rsrq(struct ib_srq *srq)
+{
+ return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL;
+}
+
+static inline struct rxe_qp *to_rqp(struct ib_qp *qp)
+{
+ return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL;
+}
+
+static inline struct rxe_cq *to_rcq(struct ib_cq *cq)
+{
+ return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL;
+}
+
+static inline struct rxe_mem *to_rmr(struct ib_mr *mr)
+{
+ return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL;
+}
+
+static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
+{
+ return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
+}
+
+int rxe_register_device(struct rxe_dev *rxe);
+int rxe_unregister_device(struct rxe_dev *rxe);
+
+void rxe_mc_cleanup(void *arg);
+
+#endif /* RXE_VERBS_H */
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
- (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
- (int)priv->ca->attrs.fw_ver & 0xffff);
+ ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
+ sizeof(drvinfo->fw_version));
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features = NETIF_F_SG |
- NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+ priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
- .max_send_sge = 1,
+ .max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
+ MAX_SKB_FRAGS + 1),
.max_recv_sge = IPOIB_UD_RX_SG
},
.sq_sig_type = IB_SIGNAL_ALL_WR,
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
- if (dev->features & NETIF_F_SG)
- init_attr.cap.max_send_sge =
- min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
-
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
+ if (init_attr.cap.max_send_sge > 1)
+ dev->features |= NETIF_F_SG;
+
priv->max_send_sge = init_attr.cap.max_send_sge;
return 0;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
- isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
- device->ib_device->attrs.max_sge_rd);
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
u32 responder_resources;
u32 initiator_depth;
bool pi_support;
- u32 max_sge;
struct iser_rx_desc *login_req_buf;
char *login_rsp_buf;
u64 login_req_dma;
struct ib_qp_init_attr *qp_init;
struct srpt_port *sport = ch->sport;
struct srpt_device *sdev = sport->sdev;
+ const struct ib_device_attr *attrs = &sdev->device->attrs;
u32 srp_sq_size = sport->port_attrib.srp_sq_size;
int ret;
*/
qp_init->cap.max_send_wr = srp_sq_size / 2;
qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
- qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
+ qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
qp_init->port_num = ch->sport->port;
ch->qp = ib_create_qp(sdev->pd, qp_init);
container_of(cmd, struct srpt_send_ioctx, cmd);
struct srpt_rdma_ch *ch = ioctx->ch;
struct srpt_device *sdev = ch->sport->sdev;
- struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr;
+ struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr;
struct ib_sge sge;
enum srpt_command_state state;
unsigned long flags;
struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp,
- ch->sport->port, NULL,
- first_wr ? first_wr : &send_wr);
+ ch->sport->port, NULL, first_wr);
}
- } else {
- first_wr = &send_wr;
}
if (state != SRPT_STATE_MGMT)
SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
SRPT_DEF_SG_TABLESIZE = 128,
- SRPT_DEF_SG_PER_WQE = 16,
+ /*
+ * An experimentally determined value that avoids that QP creation
+ * fails due to "swiotlb buffer is full" on systems using the swiotlb.
+ */
+ SRPT_MAX_SG_PER_WQE = 16,
MIN_SRPT_SQ_SIZE = 16,
DEF_SRPT_SQ_SIZE = 4096,
module_param(sticks_to_null, bool, S_IRUGO);
MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads");
+static bool auto_poweroff = true;
+module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend");
+
static const struct xpad_device {
u16 idVendor;
u16 idProduct;
usb_kill_urb(xpad->irq_in);
}
+static void xpad360w_poweroff_controller(struct usb_xpad *xpad)
+{
+ unsigned long flags;
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_CMD_IDX];
+
+ spin_lock_irqsave(&xpad->odata_lock, flags);
+
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x00;
+ packet->data[2] = 0x08;
+ packet->data[3] = 0xC0;
+ packet->data[4] = 0x00;
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
+
+ /* Reset the sequence so we send out poweroff now */
+ xpad->last_out_packet = -1;
+ xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
+}
+
static int xpad360w_start_input(struct usb_xpad *xpad)
{
int error;
* or goes away.
*/
xpad360w_stop_input(xpad);
+
+ /*
+ * The wireless adapter is going off now, so the
+ * gamepads are going to become disconnected.
+ * Unless explicitly disabled, power them down
+ * so they don't just sit there flashing.
+ */
+ if (auto_poweroff && xpad->pad_present)
+ xpad360w_poweroff_controller(xpad);
} else {
mutex_lock(&input->mutex);
if (input->users)
if (ret >= 0)
cros_ec_keyb_process(ckdev, kb_state, ret);
else
- dev_err(ec->dev, "failed to get keyboard state: %d\n", ret);
+ dev_err(ckdev->dev, "failed to get keyboard state: %d\n", ret);
return IRQ_HANDLED;
}
static int cros_ec_keyb_probe(struct platform_device *pdev)
{
struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
- struct device *dev = ec->dev;
+ struct device *dev = &pdev->dev;
struct cros_ec_keyb *ckdev;
struct input_dev *idev;
struct device_node *np;
if (!np)
return -ENODEV;
- ckdev = devm_kzalloc(&pdev->dev, sizeof(*ckdev), GFP_KERNEL);
+ ckdev = devm_kzalloc(dev, sizeof(*ckdev), GFP_KERNEL);
if (!ckdev)
return -ENOMEM;
- err = matrix_keypad_parse_of_params(&pdev->dev, &ckdev->rows,
- &ckdev->cols);
+ err = matrix_keypad_parse_of_params(dev, &ckdev->rows, &ckdev->cols);
if (err)
return err;
- ckdev->valid_keys = devm_kzalloc(&pdev->dev, ckdev->cols, GFP_KERNEL);
+ ckdev->valid_keys = devm_kzalloc(dev, ckdev->cols, GFP_KERNEL);
if (!ckdev->valid_keys)
return -ENOMEM;
- ckdev->old_kb_state = devm_kzalloc(&pdev->dev, ckdev->cols, GFP_KERNEL);
+ ckdev->old_kb_state = devm_kzalloc(dev, ckdev->cols, GFP_KERNEL);
if (!ckdev->old_kb_state)
return -ENOMEM;
- idev = devm_input_allocate_device(&pdev->dev);
+ idev = devm_input_allocate_device(dev);
if (!idev)
return -ENOMEM;
ckdev->ec = ec;
ckdev->dev = dev;
- dev_set_drvdata(&pdev->dev, ckdev);
+ dev_set_drvdata(dev, ckdev);
idev->name = CROS_EC_DEV_NAME;
idev->phys = ec->phys_name;
idev->id.bustype = BUS_VIRTUAL;
idev->id.version = 1;
idev->id.product = 0;
- idev->dev.parent = &pdev->dev;
+ idev->dev.parent = dev;
idev->open = cros_ec_keyb_open;
idev->close = cros_ec_keyb_close;
#define DRV_NAME "rotary-encoder"
+enum rotary_encoder_encoding {
+ ROTENC_GRAY,
+ ROTENC_BINARY,
+};
+
struct rotary_encoder {
struct input_dev *input;
u32 axis;
bool relative_axis;
bool rollover;
+ enum rotary_encoder_encoding encoding;
unsigned int pos;
for (i = 0; i < encoder->gpios->ndescs; ++i) {
int val = gpiod_get_value_cansleep(encoder->gpios->desc[i]);
+
/* convert from gray encoding to normal */
- if (ret & 1)
+ if (encoder->encoding == ROTENC_GRAY && ret & 1)
val = !val;
ret = ret << 1 | val;
encoder->rollover =
device_property_read_bool(dev, "rotary-encoder,rollover");
+ if (!device_property_present(dev, "rotary-encoder,encoding") ||
+ !device_property_match_string(dev, "rotary-encoder,encoding",
+ "gray")) {
+ dev_info(dev, "gray");
+ encoder->encoding = ROTENC_GRAY;
+ } else if (!device_property_match_string(dev, "rotary-encoder,encoding",
+ "binary")) {
+ dev_info(dev, "binary");
+ encoder->encoding = ROTENC_BINARY;
+ } else {
+ dev_err(dev, "unknown encoding setting\n");
+ return -EINVAL;
+ }
+
device_property_read_u32(dev, "linux,axis", &encoder->axis);
encoder->relative_axis =
device_property_read_bool(dev, "rotary-encoder,relative-axis");
* Copyright (c) 2013 ELAN Microelectronics Corp.
*
* Author: æž—æ”¿ç¶ (Duson Lin) <dusonlin@emc.com.tw>
- * Version: 1.6.0
+ * Author: KT Liao <kt.liao@emc.com.tw>
+ * Version: 1.6.2
*
* Based on cyapa driver:
* copyright (c) 2011-2012 Cypress Semiconductor, Inc.
#include "elan_i2c.h"
#define DRIVER_NAME "elan_i2c"
-#define ELAN_DRIVER_VERSION "1.6.1"
+#define ELAN_DRIVER_VERSION "1.6.2"
#define ELAN_VENDOR_ID 0x04f3
#define ETP_MAX_PRESSURE 255
#define ETP_FWIDTH_REDUCE 90
return error;
}
+static int elan_query_product(struct elan_tp_data *data)
+{
+ int error;
+
+ error = data->ops->get_product_id(data->client, &data->product_id);
+ if (error)
+ return error;
+
+ error = data->ops->get_sm_version(data->client, &data->ic_type,
+ &data->sm_version);
+ if (error)
+ return error;
+
+ return 0;
+}
+
+static int elan_check_ASUS_special_fw(struct elan_tp_data *data)
+{
+ if (data->ic_type != 0x0E)
+ return false;
+
+ switch (data->product_id) {
+ case 0x05 ... 0x07:
+ case 0x09:
+ case 0x13:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int __elan_initialize(struct elan_tp_data *data)
{
struct i2c_client *client = data->client;
+ bool woken_up = false;
int error;
error = data->ops->initialize(client);
return error;
}
+ error = elan_query_product(data);
+ if (error)
+ return error;
+
+ /*
+ * Some ASUS devices were shipped with firmware that requires
+ * touchpads to be woken up first, before attempting to switch
+ * them into absolute reporting mode.
+ */
+ if (elan_check_ASUS_special_fw(data)) {
+ error = data->ops->sleep_control(client, false);
+ if (error) {
+ dev_err(&client->dev,
+ "failed to wake device up: %d\n", error);
+ return error;
+ }
+
+ msleep(200);
+ woken_up = true;
+ }
+
data->mode |= ETP_ENABLE_ABS;
error = data->ops->set_mode(client, data->mode);
if (error) {
return error;
}
- error = data->ops->sleep_control(client, false);
- if (error) {
- dev_err(&client->dev,
- "failed to wake device up: %d\n", error);
- return error;
+ if (!woken_up) {
+ error = data->ops->sleep_control(client, false);
+ if (error) {
+ dev_err(&client->dev,
+ "failed to wake device up: %d\n", error);
+ return error;
+ }
}
return 0;
{
int error;
- error = data->ops->get_product_id(data->client, &data->product_id);
- if (error)
- return error;
-
error = data->ops->get_version(data->client, false, &data->fw_version);
if (error)
return error;
if (error)
return error;
- error = data->ops->get_sm_version(data->client, &data->ic_type,
- &data->sm_version);
- if (error)
- return error;
-
error = data->ops->get_version(data->client, true, &data->iap_version);
if (error)
return error;
*/
static void elantech_packet_dump(struct psmouse *psmouse)
{
- int i;
-
- psmouse_printk(KERN_DEBUG, psmouse, "PS/2 packet [");
- for (i = 0; i < psmouse->pktsize; i++)
- printk("%s0x%02x ", i ? ", " : " ", psmouse->packet[i]);
- printk("]\n");
+ psmouse_printk(KERN_DEBUG, psmouse, "PS/2 packet [%*ph]\n",
+ psmouse->pktsize, psmouse->packet);
}
/*
void rmi_unregister_function(struct rmi_function *fn)
{
device_del(&fn->dev);
-
- if (fn->dev.of_node)
- of_node_put(fn->dev.of_node);
-
+ of_node_put(fn->dev.of_node);
put_device(&fn->dev);
}
serio->start = i8042_start;
serio->stop = i8042_stop;
serio->close = i8042_port_close;
+ serio->ps2_cmd_mutex = &i8042_mutex;
serio->port_data = port;
serio->dev.parent = &i8042_platform_device->dev;
strlcpy(serio->name, "i8042 KBD port", sizeof(serio->name));
}
}
-/*
- * Checks whether port belongs to i8042 controller.
- */
-bool i8042_check_port_owner(const struct serio *port)
-{
- int i;
-
- for (i = 0; i < I8042_NUM_PORTS; i++)
- if (i8042_ports[i].serio == port)
- return true;
-
- return false;
-}
-EXPORT_SYMBOL(i8042_check_port_owner);
-
static void i8042_free_irqs(void)
{
if (i8042_aux_irq_registered)
void ps2_begin_command(struct ps2dev *ps2dev)
{
- mutex_lock(&ps2dev->cmd_mutex);
+ struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex;
- if (i8042_check_port_owner(ps2dev->serio))
- i8042_lock_chip();
+ mutex_lock(m);
}
EXPORT_SYMBOL(ps2_begin_command);
void ps2_end_command(struct ps2dev *ps2dev)
{
- if (i8042_check_port_owner(ps2dev->serio))
- i8042_unlock_chip();
+ struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex;
- mutex_unlock(&ps2dev->cmd_mutex);
+ mutex_unlock(m);
}
EXPORT_SYMBOL(ps2_end_command);
To compile this driver as a module, choose M here: the
module will be called raydium_i2c_ts.
+config TOUCHSCREEN_SILEAD
+ tristate "Silead I2C touchscreen"
+ depends on I2C
+ help
+ Say Y here if you have the Silead touchscreen connected to
+ your system.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called silead.
+
+config TOUCHSCREEN_SIS_I2C
+ tristate "SiS 9200 family I2C touchscreen"
+ depends on I2C
+ select CRC_ITU_T
+ depends on GPIOLIB || COMPILE_TEST
+ help
+ This enables support for SiS 9200 family over I2C based touchscreens.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called sis_i2c.
+
config TOUCHSCREEN_ST1232
tristate "Sitronix ST1232 touchscreen controllers"
depends on I2C
obj-$(CONFIG_TOUCHSCREEN_PIXCIR) += pixcir_i2c_ts.o
obj-$(CONFIG_TOUCHSCREEN_RM_TS) += raydium_i2c_ts.o
obj-$(CONFIG_TOUCHSCREEN_S3C2410) += s3c2410_ts.o
+obj-$(CONFIG_TOUCHSCREEN_SILEAD) += silead.o
+obj-$(CONFIG_TOUCHSCREEN_SIS_I2C) += sis_i2c.o
obj-$(CONFIG_TOUCHSCREEN_ST1232) += st1232.o
obj-$(CONFIG_TOUCHSCREEN_STMPE) += stmpe-ts.o
obj-$(CONFIG_TOUCHSCREEN_SUN4I) += sun4i-ts.o
return count;
}
-static DEVICE_ATTR(calibrate, 0644, NULL, ili210x_calibrate);
+static DEVICE_ATTR(calibrate, S_IWUSR, NULL, ili210x_calibrate);
static struct attribute *ili210x_attributes[] = {
&dev_attr_calibrate.attr,
--- /dev/null
+/* -------------------------------------------------------------------------
+ * Copyright (C) 2014-2015, Intel Corporation
+ *
+ * Derived from:
+ * gslX68X.c
+ * Copyright (C) 2010-2015, Shanghai Sileadinc Co.Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * -------------------------------------------------------------------------
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/gpio/consumer.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/pm.h>
+#include <linux/irq.h>
+
+#include <asm/unaligned.h>
+
+#define SILEAD_TS_NAME "silead_ts"
+
+#define SILEAD_REG_RESET 0xE0
+#define SILEAD_REG_DATA 0x80
+#define SILEAD_REG_TOUCH_NR 0x80
+#define SILEAD_REG_POWER 0xBC
+#define SILEAD_REG_CLOCK 0xE4
+#define SILEAD_REG_STATUS 0xB0
+#define SILEAD_REG_ID 0xFC
+#define SILEAD_REG_MEM_CHECK 0xB0
+
+#define SILEAD_STATUS_OK 0x5A5A5A5A
+#define SILEAD_TS_DATA_LEN 44
+#define SILEAD_CLOCK 0x04
+
+#define SILEAD_CMD_RESET 0x88
+#define SILEAD_CMD_START 0x00
+
+#define SILEAD_POINT_DATA_LEN 0x04
+#define SILEAD_POINT_Y_OFF 0x00
+#define SILEAD_POINT_Y_MSB_OFF 0x01
+#define SILEAD_POINT_X_OFF 0x02
+#define SILEAD_POINT_X_MSB_OFF 0x03
+#define SILEAD_TOUCH_ID_MASK 0xF0
+
+#define SILEAD_CMD_SLEEP_MIN 10000
+#define SILEAD_CMD_SLEEP_MAX 20000
+#define SILEAD_POWER_SLEEP 20
+#define SILEAD_STARTUP_SLEEP 30
+
+#define SILEAD_MAX_FINGERS 10
+
+enum silead_ts_power {
+ SILEAD_POWER_ON = 1,
+ SILEAD_POWER_OFF = 0
+};
+
+struct silead_ts_data {
+ struct i2c_client *client;
+ struct gpio_desc *gpio_power;
+ struct input_dev *input;
+ char fw_name[64];
+ struct touchscreen_properties prop;
+ u32 max_fingers;
+ u32 chip_id;
+ struct input_mt_pos pos[SILEAD_MAX_FINGERS];
+ int slots[SILEAD_MAX_FINGERS];
+ int id[SILEAD_MAX_FINGERS];
+};
+
+struct silead_fw_data {
+ u32 offset;
+ u32 val;
+};
+
+static int silead_ts_request_input_dev(struct silead_ts_data *data)
+{
+ struct device *dev = &data->client->dev;
+ int error;
+
+ data->input = devm_input_allocate_device(dev);
+ if (!data->input) {
+ dev_err(dev,
+ "Failed to allocate input device\n");
+ return -ENOMEM;
+ }
+
+ input_set_abs_params(data->input, ABS_MT_POSITION_X, 0, 4095, 0, 0);
+ input_set_abs_params(data->input, ABS_MT_POSITION_Y, 0, 4095, 0, 0);
+ touchscreen_parse_properties(data->input, true, &data->prop);
+
+ input_mt_init_slots(data->input, data->max_fingers,
+ INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED |
+ INPUT_MT_TRACK);
+
+ data->input->name = SILEAD_TS_NAME;
+ data->input->phys = "input/ts";
+ data->input->id.bustype = BUS_I2C;
+
+ error = input_register_device(data->input);
+ if (error) {
+ dev_err(dev, "Failed to register input device: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static void silead_ts_set_power(struct i2c_client *client,
+ enum silead_ts_power state)
+{
+ struct silead_ts_data *data = i2c_get_clientdata(client);
+
+ if (data->gpio_power) {
+ gpiod_set_value_cansleep(data->gpio_power, state);
+ msleep(SILEAD_POWER_SLEEP);
+ }
+}
+
+static void silead_ts_read_data(struct i2c_client *client)
+{
+ struct silead_ts_data *data = i2c_get_clientdata(client);
+ struct input_dev *input = data->input;
+ struct device *dev = &client->dev;
+ u8 *bufp, buf[SILEAD_TS_DATA_LEN];
+ int touch_nr, error, i;
+
+ error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_DATA,
+ SILEAD_TS_DATA_LEN, buf);
+ if (error < 0) {
+ dev_err(dev, "Data read error %d\n", error);
+ return;
+ }
+
+ touch_nr = buf[0];
+ if (touch_nr > data->max_fingers) {
+ dev_warn(dev, "More touches reported then supported %d > %d\n",
+ touch_nr, data->max_fingers);
+ touch_nr = data->max_fingers;
+ }
+
+ bufp = buf + SILEAD_POINT_DATA_LEN;
+ for (i = 0; i < touch_nr; i++, bufp += SILEAD_POINT_DATA_LEN) {
+ /* Bits 4-7 are the touch id */
+ data->id[i] = (bufp[SILEAD_POINT_X_MSB_OFF] &
+ SILEAD_TOUCH_ID_MASK) >> 4;
+ touchscreen_set_mt_pos(&data->pos[i], &data->prop,
+ get_unaligned_le16(&bufp[SILEAD_POINT_X_OFF]) & 0xfff,
+ get_unaligned_le16(&bufp[SILEAD_POINT_Y_OFF]) & 0xfff);
+ }
+
+ input_mt_assign_slots(input, data->slots, data->pos, touch_nr, 0);
+
+ for (i = 0; i < touch_nr; i++) {
+ input_mt_slot(input, data->slots[i]);
+ input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+ input_report_abs(input, ABS_MT_POSITION_X, data->pos[i].x);
+ input_report_abs(input, ABS_MT_POSITION_Y, data->pos[i].y);
+
+ dev_dbg(dev, "x=%d y=%d hw_id=%d sw_id=%d\n", data->pos[i].x,
+ data->pos[i].y, data->id[i], data->slots[i]);
+ }
+
+ input_mt_sync_frame(input);
+ input_sync(input);
+}
+
+static int silead_ts_init(struct i2c_client *client)
+{
+ struct silead_ts_data *data = i2c_get_clientdata(client);
+ int error;
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET,
+ SILEAD_CMD_RESET);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_TOUCH_NR,
+ data->max_fingers);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_CLOCK,
+ SILEAD_CLOCK);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET,
+ SILEAD_CMD_START);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ return 0;
+
+i2c_write_err:
+ dev_err(&client->dev, "Registers clear error %d\n", error);
+ return error;
+}
+
+static int silead_ts_reset(struct i2c_client *client)
+{
+ int error;
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET,
+ SILEAD_CMD_RESET);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_CLOCK,
+ SILEAD_CLOCK);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_POWER,
+ SILEAD_CMD_START);
+ if (error)
+ goto i2c_write_err;
+ usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
+
+ return 0;
+
+i2c_write_err:
+ dev_err(&client->dev, "Chip reset error %d\n", error);
+ return error;
+}
+
+static int silead_ts_startup(struct i2c_client *client)
+{
+ int error;
+
+ error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, 0x00);
+ if (error) {
+ dev_err(&client->dev, "Startup error %d\n", error);
+ return error;
+ }
+
+ msleep(SILEAD_STARTUP_SLEEP);
+
+ return 0;
+}
+
+static int silead_ts_load_fw(struct i2c_client *client)
+{
+ struct device *dev = &client->dev;
+ struct silead_ts_data *data = i2c_get_clientdata(client);
+ unsigned int fw_size, i;
+ const struct firmware *fw;
+ struct silead_fw_data *fw_data;
+ int error;
+
+ dev_dbg(dev, "Firmware file name: %s", data->fw_name);
+
+ error = request_firmware(&fw, data->fw_name, dev);
+ if (error) {
+ dev_err(dev, "Firmware request error %d\n", error);
+ return error;
+ }
+
+ fw_size = fw->size / sizeof(*fw_data);
+ fw_data = (struct silead_fw_data *)fw->data;
+
+ for (i = 0; i < fw_size; i++) {
+ error = i2c_smbus_write_i2c_block_data(client,
+ fw_data[i].offset,
+ 4,
+ (u8 *)&fw_data[i].val);
+ if (error) {
+ dev_err(dev, "Firmware load error %d\n", error);
+ break;
+ }
+ }
+
+ release_firmware(fw);
+ return error ?: 0;
+}
+
+static u32 silead_ts_get_status(struct i2c_client *client)
+{
+ int error;
+ __le32 status;
+
+ error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_STATUS,
+ sizeof(status), (u8 *)&status);
+ if (error < 0) {
+ dev_err(&client->dev, "Status read error %d\n", error);
+ return error;
+ }
+
+ return le32_to_cpu(status);
+}
+
+static int silead_ts_get_id(struct i2c_client *client)
+{
+ struct silead_ts_data *data = i2c_get_clientdata(client);
+ __le32 chip_id;
+ int error;
+
+ error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_ID,
+ sizeof(chip_id), (u8 *)&chip_id);
+ if (error < 0) {
+ dev_err(&client->dev, "Chip ID read error %d\n", error);
+ return error;
+ }
+
+ data->chip_id = le32_to_cpu(chip_id);
+ dev_info(&client->dev, "Silead chip ID: 0x%8X", data->chip_id);
+
+ return 0;
+}
+
+static int silead_ts_setup(struct i2c_client *client)
+{
+ int error;
+ u32 status;
+
+ silead_ts_set_power(client, SILEAD_POWER_OFF);
+ silead_ts_set_power(client, SILEAD_POWER_ON);
+
+ error = silead_ts_get_id(client);
+ if (error)
+ return error;
+
+ error = silead_ts_init(client);
+ if (error)
+ return error;
+
+ error = silead_ts_reset(client);
+ if (error)
+ return error;
+
+ error = silead_ts_load_fw(client);
+ if (error)
+ return error;
+
+ error = silead_ts_startup(client);
+ if (error)
+ return error;
+
+ status = silead_ts_get_status(client);
+ if (status != SILEAD_STATUS_OK) {
+ dev_err(&client->dev,
+ "Initialization error, status: 0x%X\n", status);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static irqreturn_t silead_ts_threaded_irq_handler(int irq, void *id)
+{
+ struct silead_ts_data *data = id;
+ struct i2c_client *client = data->client;
+
+ silead_ts_read_data(client);
+
+ return IRQ_HANDLED;
+}
+
+static void silead_ts_read_props(struct i2c_client *client)
+{
+ struct silead_ts_data *data = i2c_get_clientdata(client);
+ struct device *dev = &client->dev;
+ const char *str;
+ int error;
+
+ error = device_property_read_u32(dev, "silead,max-fingers",
+ &data->max_fingers);
+ if (error) {
+ dev_dbg(dev, "Max fingers read error %d\n", error);
+ data->max_fingers = 5; /* Most devices handle up-to 5 fingers */
+ }
+
+ error = device_property_read_string(dev, "touchscreen-fw-name", &str);
+ if (!error)
+ snprintf(data->fw_name, sizeof(data->fw_name), "%s", str);
+ else
+ dev_dbg(dev, "Firmware file name read error. Using default.");
+}
+
+#ifdef CONFIG_ACPI
+static int silead_ts_set_default_fw_name(struct silead_ts_data *data,
+ const struct i2c_device_id *id)
+{
+ const struct acpi_device_id *acpi_id;
+ struct device *dev = &data->client->dev;
+ int i;
+
+ if (ACPI_HANDLE(dev)) {
+ acpi_id = acpi_match_device(dev->driver->acpi_match_table, dev);
+ if (!acpi_id)
+ return -ENODEV;
+
+ snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw",
+ acpi_id->id);
+
+ for (i = 0; i < strlen(data->fw_name); i++)
+ data->fw_name[i] = tolower(data->fw_name[i]);
+ } else {
+ snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw",
+ id->name);
+ }
+
+ return 0;
+}
+#else
+static int silead_ts_set_default_fw_name(struct silead_ts_data *data,
+ const struct i2c_device_id *id)
+{
+ snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name);
+ return 0;
+}
+#endif
+
+static int silead_ts_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct silead_ts_data *data;
+ struct device *dev = &client->dev;
+ int error;
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_I2C |
+ I2C_FUNC_SMBUS_READ_I2C_BLOCK |
+ I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+ dev_err(dev, "I2C functionality check failed\n");
+ return -ENXIO;
+ }
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, data);
+ data->client = client;
+
+ error = silead_ts_set_default_fw_name(data, id);
+ if (error)
+ return error;
+
+ silead_ts_read_props(client);
+
+ /* We must have the IRQ provided by DT or ACPI subsytem */
+ if (client->irq <= 0)
+ return -ENODEV;
+
+ /* Power GPIO pin */
+ data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
+ if (IS_ERR(data->gpio_power)) {
+ if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
+ dev_err(dev, "Shutdown GPIO request failed\n");
+ return PTR_ERR(data->gpio_power);
+ }
+
+ error = silead_ts_setup(client);
+ if (error)
+ return error;
+
+ error = silead_ts_request_input_dev(data);
+ if (error)
+ return error;
+
+ error = devm_request_threaded_irq(dev, client->irq,
+ NULL, silead_ts_threaded_irq_handler,
+ IRQF_ONESHOT, client->name, data);
+ if (error) {
+ if (error != -EPROBE_DEFER)
+ dev_err(dev, "IRQ request failed %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused silead_ts_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ silead_ts_set_power(client, SILEAD_POWER_OFF);
+ return 0;
+}
+
+static int __maybe_unused silead_ts_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ int error, status;
+
+ silead_ts_set_power(client, SILEAD_POWER_ON);
+
+ error = silead_ts_reset(client);
+ if (error)
+ return error;
+
+ error = silead_ts_startup(client);
+ if (error)
+ return error;
+
+ status = silead_ts_get_status(client);
+ if (status != SILEAD_STATUS_OK) {
+ dev_err(dev, "Resume error, status: 0x%02x\n", status);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(silead_ts_pm, silead_ts_suspend, silead_ts_resume);
+
+static const struct i2c_device_id silead_ts_id[] = {
+ { "gsl1680", 0 },
+ { "gsl1688", 0 },
+ { "gsl3670", 0 },
+ { "gsl3675", 0 },
+ { "gsl3692", 0 },
+ { "mssl1680", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, silead_ts_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id silead_ts_acpi_match[] = {
+ { "GSL1680", 0 },
+ { "GSL1688", 0 },
+ { "GSL3670", 0 },
+ { "GSL3675", 0 },
+ { "GSL3692", 0 },
+ { "MSSL1680", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, silead_ts_acpi_match);
+#endif
+
+static struct i2c_driver silead_ts_driver = {
+ .probe = silead_ts_probe,
+ .id_table = silead_ts_id,
+ .driver = {
+ .name = SILEAD_TS_NAME,
+ .acpi_match_table = ACPI_PTR(silead_ts_acpi_match),
+ .pm = &silead_ts_pm,
+ },
+};
+module_i2c_driver(silead_ts_driver);
+
+MODULE_AUTHOR("Robert Dolca <robert.dolca@intel.com>");
+MODULE_DESCRIPTION("Silead I2C touchscreen driver");
+MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Touch Screen driver for SiS 9200 family I2C Touch panels
+ *
+ * Copyright (C) 2015 SiS, Inc.
+ * Copyright (C) 2016 Nextfour Group
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/crc-itu-t.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/interrupt.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#define SIS_I2C_NAME "sis_i2c_ts"
+
+/*
+ * The I2C packet format:
+ * le16 byte count
+ * u8 Report ID
+ * <contact data - variable length>
+ * u8 Number of contacts
+ * le16 Scan Time (optional)
+ * le16 CRC
+ *
+ * One touch point information consists of 6+ bytes, the order is:
+ * u8 contact state
+ * u8 finger id
+ * le16 x axis
+ * le16 y axis
+ * u8 contact width (optional)
+ * u8 contact height (optional)
+ * u8 pressure (optional)
+ *
+ * Maximum amount of data transmitted in one shot is 64 bytes, if controller
+ * needs to report more contacts than fit in one packet it will send true
+ * number of contacts in first packet and 0 as number of contacts in second
+ * packet.
+ */
+
+#define SIS_MAX_PACKET_SIZE 64
+
+#define SIS_PKT_LEN_OFFSET 0
+#define SIS_PKT_REPORT_OFFSET 2 /* Report ID/type */
+#define SIS_PKT_CONTACT_OFFSET 3 /* First contact */
+
+#define SIS_SCAN_TIME_LEN 2
+
+/* Supported report types */
+#define SIS_ALL_IN_ONE_PACKAGE 0x10
+#define SIS_PKT_IS_TOUCH(x) (((x) & 0x0f) == 0x01)
+#define SIS_PKT_IS_HIDI2C(x) (((x) & 0x0f) == 0x06)
+
+/* Contact properties within report */
+#define SIS_PKT_HAS_AREA(x) ((x) & BIT(4))
+#define SIS_PKT_HAS_PRESSURE(x) ((x) & BIT(5))
+#define SIS_PKT_HAS_SCANTIME(x) ((x) & BIT(6))
+
+/* Contact size */
+#define SIS_BASE_LEN_PER_CONTACT 6
+#define SIS_AREA_LEN_PER_CONTACT 2
+#define SIS_PRESSURE_LEN_PER_CONTACT 1
+
+/* Offsets within contact data */
+#define SIS_CONTACT_STATUS_OFFSET 0
+#define SIS_CONTACT_ID_OFFSET 1 /* Contact ID */
+#define SIS_CONTACT_X_OFFSET 2
+#define SIS_CONTACT_Y_OFFSET 4
+#define SIS_CONTACT_WIDTH_OFFSET 6
+#define SIS_CONTACT_HEIGHT_OFFSET 7
+#define SIS_CONTACT_PRESSURE_OFFSET(id) (SIS_PKT_HAS_AREA(id) ? 8 : 6)
+
+/* Individual contact state */
+#define SIS_STATUS_UP 0x0
+#define SIS_STATUS_DOWN 0x3
+
+/* Touchscreen parameters */
+#define SIS_MAX_FINGERS 10
+#define SIS_MAX_X 4095
+#define SIS_MAX_Y 4095
+#define SIS_MAX_PRESSURE 255
+
+/* Resolution diagonal */
+#define SIS_AREA_LENGTH_LONGER 5792
+/*((SIS_MAX_X^2) + (SIS_MAX_Y^2))^0.5*/
+#define SIS_AREA_LENGTH_SHORT 5792
+#define SIS_AREA_UNIT (5792 / 32)
+
+struct sis_ts_data {
+ struct i2c_client *client;
+ struct input_dev *input;
+
+ struct gpio_desc *attn_gpio;
+ struct gpio_desc *reset_gpio;
+
+ u8 packet[SIS_MAX_PACKET_SIZE];
+};
+
+static int sis_read_packet(struct i2c_client *client, u8 *buf,
+ unsigned int *num_contacts,
+ unsigned int *contact_size)
+{
+ int count_idx;
+ int ret;
+ u16 len;
+ u16 crc, pkg_crc;
+ u8 report_id;
+
+ ret = i2c_master_recv(client, buf, SIS_MAX_PACKET_SIZE);
+ if (ret <= 0)
+ return -EIO;
+
+ len = get_unaligned_le16(&buf[SIS_PKT_LEN_OFFSET]);
+ if (len > SIS_MAX_PACKET_SIZE) {
+ dev_err(&client->dev,
+ "%s: invalid packet length (%d vs %d)\n",
+ __func__, len, SIS_MAX_PACKET_SIZE);
+ return -E2BIG;
+ }
+
+ if (len < 10)
+ return -EINVAL;
+
+ report_id = buf[SIS_PKT_REPORT_OFFSET];
+ count_idx = len - 1;
+ *contact_size = SIS_BASE_LEN_PER_CONTACT;
+
+ if (report_id != SIS_ALL_IN_ONE_PACKAGE) {
+ if (SIS_PKT_IS_TOUCH(report_id)) {
+ /*
+ * Calculate CRC ignoring packet length
+ * in the beginning and CRC transmitted
+ * at the end of the packet.
+ */
+ crc = crc_itu_t(0, buf + 2, len - 2 - 2);
+ pkg_crc = get_unaligned_le16(&buf[len - 2]);
+
+ if (crc != pkg_crc) {
+ dev_err(&client->dev,
+ "%s: CRC Error (%d vs %d)\n",
+ __func__, crc, pkg_crc);
+ return -EINVAL;
+ }
+
+ count_idx -= 2;
+
+ } else if (!SIS_PKT_IS_HIDI2C(report_id)) {
+ dev_err(&client->dev,
+ "%s: invalid packet ID %#02x\n",
+ __func__, report_id);
+ return -EINVAL;
+ }
+
+ if (SIS_PKT_HAS_SCANTIME(report_id))
+ count_idx -= SIS_SCAN_TIME_LEN;
+
+ if (SIS_PKT_HAS_AREA(report_id))
+ *contact_size += SIS_AREA_LEN_PER_CONTACT;
+ if (SIS_PKT_HAS_PRESSURE(report_id))
+ *contact_size += SIS_PRESSURE_LEN_PER_CONTACT;
+ }
+
+ *num_contacts = buf[count_idx];
+ return 0;
+}
+
+static int sis_ts_report_contact(struct sis_ts_data *ts, const u8 *data, u8 id)
+{
+ struct input_dev *input = ts->input;
+ int slot;
+ u8 status = data[SIS_CONTACT_STATUS_OFFSET];
+ u8 pressure;
+ u8 height, width;
+ u16 x, y;
+
+ if (status != SIS_STATUS_DOWN && status != SIS_STATUS_UP) {
+ dev_err(&ts->client->dev, "Unexpected touch status: %#02x\n",
+ data[SIS_CONTACT_STATUS_OFFSET]);
+ return -EINVAL;
+ }
+
+ slot = input_mt_get_slot_by_key(input, data[SIS_CONTACT_ID_OFFSET]);
+ if (slot < 0)
+ return -ENOENT;
+
+ input_mt_slot(input, slot);
+ input_mt_report_slot_state(input, MT_TOOL_FINGER,
+ status == SIS_STATUS_DOWN);
+
+ if (status == SIS_STATUS_DOWN) {
+ pressure = height = width = 1;
+ if (id != SIS_ALL_IN_ONE_PACKAGE) {
+ if (SIS_PKT_HAS_AREA(id)) {
+ width = data[SIS_CONTACT_WIDTH_OFFSET];
+ height = data[SIS_CONTACT_HEIGHT_OFFSET];
+ }
+
+ if (SIS_PKT_HAS_PRESSURE(id))
+ pressure =
+ data[SIS_CONTACT_PRESSURE_OFFSET(id)];
+ }
+
+ x = get_unaligned_le16(&data[SIS_CONTACT_X_OFFSET]);
+ y = get_unaligned_le16(&data[SIS_CONTACT_Y_OFFSET]);
+
+ input_report_abs(input, ABS_MT_TOUCH_MAJOR,
+ width * SIS_AREA_UNIT);
+ input_report_abs(input, ABS_MT_TOUCH_MINOR,
+ height * SIS_AREA_UNIT);
+ input_report_abs(input, ABS_MT_PRESSURE, pressure);
+ input_report_abs(input, ABS_MT_POSITION_X, x);
+ input_report_abs(input, ABS_MT_POSITION_Y, y);
+ }
+
+ return 0;
+}
+
+static void sis_ts_handle_packet(struct sis_ts_data *ts)
+{
+ const u8 *contact;
+ unsigned int num_to_report = 0;
+ unsigned int num_contacts;
+ unsigned int num_reported;
+ unsigned int contact_size;
+ int error;
+ u8 report_id;
+
+ do {
+ error = sis_read_packet(ts->client, ts->packet,
+ &num_contacts, &contact_size);
+ if (error)
+ break;
+
+ if (num_to_report == 0) {
+ num_to_report = num_contacts;
+ } else if (num_contacts != 0) {
+ dev_err(&ts->client->dev,
+ "%s: nonzero (%d) point count in tail packet\n",
+ __func__, num_contacts);
+ break;
+ }
+
+ report_id = ts->packet[SIS_PKT_REPORT_OFFSET];
+ contact = &ts->packet[SIS_PKT_CONTACT_OFFSET];
+ num_reported = 0;
+
+ while (num_to_report > 0) {
+ error = sis_ts_report_contact(ts, contact, report_id);
+ if (error)
+ break;
+
+ contact += contact_size;
+ num_to_report--;
+ num_reported++;
+
+ if (report_id != SIS_ALL_IN_ONE_PACKAGE &&
+ num_reported >= 5) {
+ /*
+ * The remainder of contacts is sent
+ * in the 2nd packet.
+ */
+ break;
+ }
+ }
+ } while (num_to_report > 0);
+
+ input_mt_sync_frame(ts->input);
+ input_sync(ts->input);
+}
+
+static irqreturn_t sis_ts_irq_handler(int irq, void *dev_id)
+{
+ struct sis_ts_data *ts = dev_id;
+
+ do {
+ sis_ts_handle_packet(ts);
+ } while (ts->attn_gpio && gpiod_get_value_cansleep(ts->attn_gpio));
+
+ return IRQ_HANDLED;
+}
+
+static void sis_ts_reset(struct sis_ts_data *ts)
+{
+ if (ts->reset_gpio) {
+ /* Get out of reset */
+ usleep_range(1000, 2000);
+ gpiod_set_value(ts->reset_gpio, 1);
+ usleep_range(1000, 2000);
+ gpiod_set_value(ts->reset_gpio, 0);
+ msleep(100);
+ }
+}
+
+static int sis_ts_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct sis_ts_data *ts;
+ struct input_dev *input;
+ int error;
+
+ ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
+ if (!ts)
+ return -ENOMEM;
+
+ ts->client = client;
+ i2c_set_clientdata(client, ts);
+
+ ts->attn_gpio = devm_gpiod_get_optional(&client->dev,
+ "attn", GPIOD_IN);
+ if (IS_ERR(ts->attn_gpio)) {
+ error = PTR_ERR(ts->attn_gpio);
+ if (error != -EPROBE_DEFER)
+ dev_err(&client->dev,
+ "Failed to get attention GPIO: %d\n", error);
+ return error;
+ }
+
+ ts->reset_gpio = devm_gpiod_get_optional(&client->dev,
+ "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(ts->reset_gpio)) {
+ error = PTR_ERR(ts->reset_gpio);
+ if (error != -EPROBE_DEFER)
+ dev_err(&client->dev,
+ "Failed to get reset GPIO: %d\n", error);
+ return error;
+ }
+
+ sis_ts_reset(ts);
+
+ ts->input = input = devm_input_allocate_device(&client->dev);
+ if (!input) {
+ dev_err(&client->dev, "Failed to allocate input device\n");
+ return -ENOMEM;
+ }
+
+ input->name = "SiS Touchscreen";
+ input->id.bustype = BUS_I2C;
+
+ input_set_abs_params(input, ABS_MT_POSITION_X, 0, SIS_MAX_X, 0, 0);
+ input_set_abs_params(input, ABS_MT_POSITION_Y, 0, SIS_MAX_Y, 0, 0);
+ input_set_abs_params(input, ABS_MT_PRESSURE, 0, SIS_MAX_PRESSURE, 0, 0);
+ input_set_abs_params(input, ABS_MT_TOUCH_MAJOR,
+ 0, SIS_AREA_LENGTH_LONGER, 0, 0);
+ input_set_abs_params(input, ABS_MT_TOUCH_MINOR,
+ 0, SIS_AREA_LENGTH_SHORT, 0, 0);
+
+ error = input_mt_init_slots(input, SIS_MAX_FINGERS, INPUT_MT_DIRECT);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to initialize MT slots: %d\n", error);
+ return error;
+ }
+
+ error = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, sis_ts_irq_handler,
+ IRQF_ONESHOT,
+ client->name, ts);
+ if (error) {
+ dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
+ return error;
+ }
+
+ error = input_register_device(ts->input);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to register input device: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id sis_ts_dt_ids[] = {
+ { .compatible = "sis,9200-ts" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sis_ts_dt_ids);
+#endif
+
+static const struct i2c_device_id sis_ts_id[] = {
+ { SIS_I2C_NAME, 0 },
+ { "9200-ts", 0 },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, sis_ts_id);
+
+static struct i2c_driver sis_ts_driver = {
+ .driver = {
+ .name = SIS_I2C_NAME,
+ .of_match_table = of_match_ptr(sis_ts_dt_ids),
+ },
+ .probe = sis_ts_probe,
+ .id_table = sis_ts_id,
+};
+module_i2c_driver(sis_ts_driver);
+
+MODULE_DESCRIPTION("SiS 9200 Family Touchscreen Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mika Penttilä <mika.penttila@nextfour.com>");
return 0;
}
+/**
+ * add_fastmap - add a Fastmap related physical eraseblock.
+ * @ai: attaching information
+ * @pnum: physical eraseblock number the VID header came from
+ * @vid_hdr: the volume identifier header
+ * @ec: erase counter of the physical eraseblock
+ *
+ * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp
+ * physical eraseblock @pnum and adds it to the 'fastmap' list.
+ * Such blocks can be Fastmap super and data blocks from both the most
+ * recent Fastmap we're attaching from or from old Fastmaps which will
+ * be erased.
+ */
+static int add_fastmap(struct ubi_attach_info *ai, int pnum,
+ struct ubi_vid_hdr *vid_hdr, int ec)
+{
+ struct ubi_ainf_peb *aeb;
+
+ aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL);
+ if (!aeb)
+ return -ENOMEM;
+
+ aeb->pnum = pnum;
+ aeb->vol_id = be32_to_cpu(vidh->vol_id);
+ aeb->sqnum = be64_to_cpu(vidh->sqnum);
+ aeb->ec = ec;
+ list_add(&aeb->u.list, &ai->fastmap);
+
+ dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum,
+ aeb->vol_id, aeb->sqnum);
+
+ return 0;
+}
+
/**
* validate_vid_hdr - check volume identifier header.
* @ubi: UBI device description object
return err;
}
+static bool vol_ignored(int vol_id)
+{
+ switch (vol_id) {
+ case UBI_LAYOUT_VOLUME_ID:
+ return true;
+ }
+
+#ifdef CONFIG_MTD_UBI_FASTMAP
+ return ubi_is_fm_vol(vol_id);
+#else
+ return false;
+#endif
+}
+
/**
* scan_peb - scan and process UBI headers of a PEB.
* @ubi: UBI device description object
* @ai: attaching information
* @pnum: the physical eraseblock number
- * @vid: The volume ID of the found volume will be stored in this pointer
- * @sqnum: The sqnum of the found volume will be stored in this pointer
+ * @fast: true if we're scanning for a Fastmap
*
* This function reads UBI headers of PEB @pnum, checks them, and adds
* information about this PEB to the corresponding list or RB-tree in the
* successfully handled and a negative error code in case of failure.
*/
static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
- int pnum, int *vid, unsigned long long *sqnum)
+ int pnum, bool fast)
{
- long long uninitialized_var(ec);
+ long long ec;
int err, bitflips = 0, vol_id = -1, ec_err = 0;
dbg_bld("scan PEB %d", pnum);
*/
ai->maybe_bad_peb_count += 1;
case UBI_IO_BAD_HDR:
+ /*
+ * If we're facing a bad VID header we have to drop *all*
+ * Fastmap data structures we find. The most recent Fastmap
+ * could be bad and therefore there is a chance that we attach
+ * from an old one. On a fine MTD stack a PEB must not render
+ * bad all of a sudden, but the reality is different.
+ * So, let's be paranoid and help finding the root cause by
+ * falling back to scanning mode instead of attaching with a
+ * bad EBA table and cause data corruption which is hard to
+ * analyze.
+ */
+ if (fast)
+ ai->force_full_scan = 1;
+
if (ec_err)
/*
* Both headers are corrupted. There is a possibility
}
vol_id = be32_to_cpu(vidh->vol_id);
- if (vid)
- *vid = vol_id;
- if (sqnum)
- *sqnum = be64_to_cpu(vidh->sqnum);
- if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) {
+ if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) {
int lnum = be32_to_cpu(vidh->lnum);
/* Unsupported internal volume */
switch (vidh->compat) {
case UBI_COMPAT_DELETE:
- if (vol_id != UBI_FM_SB_VOLUME_ID
- && vol_id != UBI_FM_DATA_VOLUME_ID) {
- ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
- vol_id, lnum);
- }
+ ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it",
+ vol_id, lnum);
+
err = add_to_list(ai, pnum, vol_id, lnum,
ec, 1, &ai->erase);
if (err)
if (ec_err)
ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d",
pnum);
- err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
+
+ if (ubi_is_fm_vol(vol_id))
+ err = add_fastmap(ai, pnum, vidh, ec);
+ else
+ err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips);
+
if (err)
return err;
list_del(&aeb->u.list);
kmem_cache_free(ai->aeb_slab_cache, aeb);
}
+ list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) {
+ list_del(&aeb->u.list);
+ kmem_cache_free(ai->aeb_slab_cache, aeb);
+ }
/* Destroy the volume RB-tree */
rb = ai->volumes.rb_node;
cond_resched();
dbg_gen("process PEB %d", pnum);
- err = scan_peb(ubi, ai, pnum, NULL, NULL);
+ err = scan_peb(ubi, ai, pnum, false);
if (err < 0)
goto out_vidh;
}
INIT_LIST_HEAD(&ai->free);
INIT_LIST_HEAD(&ai->erase);
INIT_LIST_HEAD(&ai->alien);
+ INIT_LIST_HEAD(&ai->fastmap);
ai->volumes = RB_ROOT;
ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache",
sizeof(struct ubi_ainf_peb),
#ifdef CONFIG_MTD_UBI_FASTMAP
/**
- * scan_fastmap - try to find a fastmap and attach from it.
+ * scan_fast - try to find a fastmap and attach from it.
* @ubi: UBI device description object
* @ai: attach info object
*
*/
static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
{
- int err, pnum, fm_anchor = -1;
- unsigned long long max_sqnum = 0;
+ int err, pnum;
+ struct ubi_attach_info *scan_ai;
err = -ENOMEM;
+ scan_ai = alloc_ai();
+ if (!scan_ai)
+ goto out;
+
ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
if (!ech)
- goto out;
+ goto out_ai;
vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
if (!vidh)
goto out_ech;
for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) {
- int vol_id = -1;
- unsigned long long sqnum = -1;
cond_resched();
dbg_gen("process PEB %d", pnum);
- err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum);
+ err = scan_peb(ubi, scan_ai, pnum, true);
if (err < 0)
goto out_vidh;
-
- if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) {
- max_sqnum = sqnum;
- fm_anchor = pnum;
- }
}
ubi_free_vid_hdr(ubi, vidh);
kfree(ech);
- if (fm_anchor < 0)
- return UBI_NO_FASTMAP;
+ if (scan_ai->force_full_scan)
+ err = UBI_NO_FASTMAP;
+ else
+ err = ubi_scan_fastmap(ubi, *ai, scan_ai);
- destroy_ai(*ai);
- *ai = alloc_ai();
- if (!*ai)
- return -ENOMEM;
+ if (err) {
+ /*
+ * Didn't attach via fastmap, do a full scan but reuse what
+ * we've aready scanned.
+ */
+ destroy_ai(*ai);
+ *ai = scan_ai;
+ } else
+ destroy_ai(scan_ai);
- return ubi_scan_fastmap(ubi, *ai, fm_anchor);
+ return err;
out_vidh:
ubi_free_vid_hdr(ubi, vidh);
out_ech:
kfree(ech);
+out_ai:
+ destroy_ai(scan_ai);
out:
return err;
}
for (i = 0; i < UBI_MAX_DEVICES; i++) {
ubi = ubi_devices[i];
if (ubi && mtd->index == ubi->mtd->index) {
- ubi_err(ubi, "mtd%d is already attached to ubi%d",
+ pr_err("ubi: mtd%d is already attached to ubi%d",
mtd->index, i);
return -EEXIST;
}
* no sense to attach emulated MTD devices, so we prohibit this.
*/
if (mtd->type == MTD_UBIVOLUME) {
- ubi_err(ubi, "refuse attaching mtd%d - it is already emulated on top of UBI",
+ pr_err("ubi: refuse attaching mtd%d - it is already emulated on top of UBI",
mtd->index);
return -EINVAL;
}
if (!ubi_devices[ubi_num])
break;
if (ubi_num == UBI_MAX_DEVICES) {
- ubi_err(ubi, "only %d UBI devices may be created",
+ pr_err("ubi: only %d UBI devices may be created",
UBI_MAX_DEVICES);
return -ENFILE;
}
/* Make sure ubi_num is not busy */
if (ubi_devices[ubi_num]) {
- ubi_err(ubi, "already exists");
+ pr_err("ubi: ubi%i already exists", ubi_num);
return -EEXIST;
}
}
goto out_detach;
}
+ /* Make device "available" before it becomes accessible via sysfs */
+ ubi_devices[ubi_num] = ubi;
+
err = uif_init(ubi, &ref);
if (err)
goto out_detach;
wake_up_process(ubi->bgt_thread);
spin_unlock(&ubi->wl_lock);
- ubi_devices[ubi_num] = ubi;
ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
return ubi_num;
ubi_assert(ref);
uif_close(ubi);
out_detach:
+ ubi_devices[ubi_num] = NULL;
ubi_wl_close(ubi);
ubi_free_internal_volumes(ubi);
vfree(ubi->vtbl);
*/
#include <linux/crc32.h>
+#include <linux/bitmap.h>
#include "ubi.h"
/**
* init_seen - allocate memory for used for debugging.
* @ubi: UBI device description object
*/
-static inline int *init_seen(struct ubi_device *ubi)
+static inline unsigned long *init_seen(struct ubi_device *ubi)
{
- int *ret;
+ unsigned long *ret;
if (!ubi_dbg_chk_fastmap(ubi))
return NULL;
- ret = kcalloc(ubi->peb_count, sizeof(int), GFP_KERNEL);
+ ret = kcalloc(BITS_TO_LONGS(ubi->peb_count), sizeof(unsigned long),
+ GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
* free_seen - free the seen logic integer array.
* @seen: integer array of @ubi->peb_count size
*/
-static inline void free_seen(int *seen)
+static inline void free_seen(unsigned long *seen)
{
kfree(seen);
}
* @pnum: The PEB to be makred as seen
* @seen: integer array of @ubi->peb_count size
*/
-static inline void set_seen(struct ubi_device *ubi, int pnum, int *seen)
+static inline void set_seen(struct ubi_device *ubi, int pnum, unsigned long *seen)
{
if (!ubi_dbg_chk_fastmap(ubi) || !seen)
return;
- seen[pnum] = 1;
+ set_bit(pnum, seen);
}
/**
* @ubi: UBI device description object
* @seen: integer array of @ubi->peb_count size
*/
-static int self_check_seen(struct ubi_device *ubi, int *seen)
+static int self_check_seen(struct ubi_device *ubi, unsigned long *seen)
{
int pnum, ret = 0;
return 0;
for (pnum = 0; pnum < ubi->peb_count; pnum++) {
- if (!seen[pnum] && ubi->lookuptbl[pnum]) {
+ if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) {
ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum);
ret = -EINVAL;
}
list_for_each_entry(aeb, &ai->free, u.list)
n++;
- ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb)
+ ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb)
ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb)
n++;
return ret;
}
+/**
+ * find_fm_anchor - find the most recent Fastmap superblock (anchor)
+ * @ai: UBI attach info to be filled
+ */
+static int find_fm_anchor(struct ubi_attach_info *ai)
+{
+ int ret = -1;
+ struct ubi_ainf_peb *aeb;
+ unsigned long long max_sqnum = 0;
+
+ list_for_each_entry(aeb, &ai->fastmap, u.list) {
+ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) {
+ max_sqnum = aeb->sqnum;
+ ret = aeb->pnum;
+ }
+ }
+
+ return ret;
+}
+
/**
* ubi_scan_fastmap - scan the fastmap.
* @ubi: UBI device object
* @ai: UBI attach info to be filled
- * @fm_anchor: The fastmap starts at this PEB
+ * @scan_ai: UBI attach info from the first 64 PEBs,
+ * used to find the most recent Fastmap data structure
*
* Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found,
* UBI_BAD_FASTMAP if one was found but is not usable.
* < 0 indicates an internal error.
*/
int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
- int fm_anchor)
+ struct ubi_attach_info *scan_ai)
{
struct ubi_fm_sb *fmsb, *fmsb2;
struct ubi_vid_hdr *vh;
struct ubi_ec_hdr *ech;
struct ubi_fastmap_layout *fm;
- int i, used_blocks, pnum, ret = 0;
+ struct ubi_ainf_peb *tmp_aeb, *aeb;
+ int i, used_blocks, pnum, fm_anchor, ret = 0;
size_t fm_size;
__be32 crc, tmp_crc;
unsigned long long sqnum = 0;
+ fm_anchor = find_fm_anchor(scan_ai);
+ if (fm_anchor < 0)
+ return UBI_NO_FASTMAP;
+
+ /* Move all (possible) fastmap blocks into our new attach structure. */
+ list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list)
+ list_move_tail(&aeb->u.list, &ai->fastmap);
+
down_write(&ubi->fm_protect);
memset(ubi->fm_buf, 0, ubi->fm_size);
goto free_hdr;
}
+ if (i == 0 && pnum != fm_anchor) {
+ ubi_err(ubi, "Fastmap anchor PEB mismatch: PEB: %i vs. %i",
+ pnum, fm_anchor);
+ ret = UBI_BAD_FASTMAP;
+ goto free_hdr;
+ }
+
ret = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);
if (ret && ret != UBI_IO_BITFLIPS) {
ubi_err(ubi, "unable to read fastmap block# %i EC (PEB: %i)",
struct rb_node *tmp_rb;
int ret, i, j, free_peb_count, used_peb_count, vol_count;
int scrub_peb_count, erase_peb_count;
- int *seen_pebs = NULL;
+ unsigned long *seen_pebs = NULL;
fm_raw = ubi->fm_buf;
memset(ubi->fm_buf, 0, ubi->fm_size);
struct gluebi_device *gluebi;
int ubi_mode = UBI_READONLY;
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
if (mtd->flags & MTD_WRITEABLE)
ubi_mode = UBI_READWRITE;
ubi_mode);
if (IS_ERR(gluebi->desc)) {
mutex_unlock(&devices_mutex);
- module_put(THIS_MODULE);
return PTR_ERR(gluebi->desc);
}
gluebi->refcnt += 1;
gluebi->refcnt -= 1;
if (gluebi->refcnt == 0)
ubi_close_volume(gluebi->desc);
- module_put(THIS_MODULE);
mutex_unlock(&devices_mutex);
}
p = (char *)vid_hdr - ubi->vid_hdr_shift;
read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
- ubi->vid_hdr_alsize);
+ ubi->vid_hdr_shift + UBI_VID_HDR_SIZE);
if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err))
return read_err;
* @erase: list of physical eraseblocks which have to be erased
* @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
* those belonging to "preserve"-compatible internal volumes)
+ * @fastmap: list of physical eraseblocks which relate to fastmap (e.g.,
+ * eraseblocks of the current and not yet erased old fastmap blocks)
* @corr_peb_count: count of PEBs in the @corr list
* @empty_peb_count: count of PEBs which are presumably empty (contain only
* 0xFF bytes)
* @vols_found: number of volumes found
* @highest_vol_id: highest volume ID
* @is_empty: flag indicating whether the MTD device is empty or not
+ * @force_full_scan: flag indicating whether we need to do a full scan and drop
+ all existing Fastmap data structures
* @min_ec: lowest erase counter value
* @max_ec: highest erase counter value
* @max_sqnum: highest sequence number value
struct list_head free;
struct list_head erase;
struct list_head alien;
+ struct list_head fastmap;
int corr_peb_count;
int empty_peb_count;
int alien_peb_count;
int vols_found;
int highest_vol_id;
int is_empty;
+ int force_full_scan;
int min_ec;
int max_ec;
unsigned long long max_sqnum;
size_t ubi_calc_fm_size(struct ubi_device *ubi);
int ubi_update_fastmap(struct ubi_device *ubi);
int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
- int fm_anchor);
+ struct ubi_attach_info *scan_ai);
#else
static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; }
#endif
return idx;
}
+/**
+ * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume.
+ * @vol_id: volume ID
+ */
+static inline bool ubi_is_fm_vol(int vol_id)
+{
+ switch (vol_id) {
+ case UBI_FM_SB_VOLUME_ID:
+ case UBI_FM_DATA_VOLUME_ID:
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ubi_find_fm_block - check whether a PEB is part of the current Fastmap.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to look for
+ *
+ * This function returns a wear leveling object if @pnum relates to the current
+ * fastmap, @NULL otherwise.
+ */
+static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi,
+ int pnum)
+{
+ int i;
+
+ if (ubi->fm) {
+ for (i = 0; i < ubi->fm->used_blocks; i++) {
+ if (ubi->fm->e[i]->pnum == pnum)
+ return ubi->fm->e[i];
+ }
+ }
+
+ return NULL;
+}
+
#endif /* !__UBI_UBI_H__ */
spin_unlock(&ubi->volumes_lock);
}
- /* Change volume table record */
- vtbl_rec = ubi->vtbl[vol_id];
- vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
- err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
- if (err)
- goto out_acc;
-
if (pebs < 0) {
for (i = 0; i < -pebs; i++) {
err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i);
spin_unlock(&ubi->volumes_lock);
}
+ /*
+ * When we shrink a volume we have to flush all pending (erase) work.
+ * Otherwise it can happen that upon next attach UBI finds a LEB with
+ * lnum > highest_lnum and refuses to attach.
+ */
+ if (pebs < 0) {
+ err = ubi_wl_flush(ubi, vol_id, UBI_ALL);
+ if (err)
+ goto out_acc;
+ }
+
+ /* Change volume table record */
+ vtbl_rec = ubi->vtbl[vol_id];
+ vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
+ err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+ if (err)
+ goto out_acc;
+
vol->reserved_pebs = reserved_pebs;
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
vol->used_ebs = reserved_pebs;
}
}
- dbg_wl("found %i PEBs", found_pebs);
+ list_for_each_entry(aeb, &ai->fastmap, u.list) {
+ cond_resched();
+
+ e = ubi_find_fm_block(ubi, aeb->pnum);
- if (ubi->fm) {
- ubi_assert(ubi->good_peb_count ==
- found_pebs + ubi->fm->used_blocks);
+ if (e) {
+ ubi_assert(!ubi->lookuptbl[e->pnum]);
+ ubi->lookuptbl[e->pnum] = e;
+ } else {
+ /*
+ * Usually old Fastmap PEBs are scheduled for erasure
+ * and we don't have to care about them but if we face
+ * an power cut before scheduling them we need to
+ * take care of them here.
+ */
+ if (ubi->lookuptbl[aeb->pnum])
+ continue;
+
+ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
+ if (!e)
+ goto out_free;
- for (i = 0; i < ubi->fm->used_blocks; i++) {
- e = ubi->fm->e[i];
+ e->pnum = aeb->pnum;
+ e->ec = aeb->ec;
+ ubi_assert(!ubi->lookuptbl[e->pnum]);
ubi->lookuptbl[e->pnum] = e;
+ if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
+ wl_entry_destroy(ubi, e);
+ goto out_free;
+ }
}
+
+ found_pebs++;
}
- else
- ubi_assert(ubi->good_peb_count == found_pebs);
+
+ dbg_wl("found %i PEBs", found_pebs);
+
+ ubi_assert(ubi->good_peb_count == found_pebs);
reserved_pebs = WL_RESERVED_PEBS;
ubi_fastmap_init(ubi, &reserved_pebs);
enum CPL_error {
CPL_ERR_NONE = 0,
+ CPL_ERR_TCAM_PARITY = 1,
+ CPL_ERR_TCAM_MISS = 2,
CPL_ERR_TCAM_FULL = 3,
CPL_ERR_BAD_LENGTH = 15,
CPL_ERR_BAD_ROUTE = 18,
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c
+#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c
#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
#define QUERY_DEV_CAP_VXLAN 0x9e
#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
if (field32 & (1 << 7))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT);
+ if (field32 & (1 << 17))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT;
MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
if (field & 1<<6)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
MLX4_CMD_NATIVE);
}
+int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
+ const u32 offset[],
+ u32 value[], size_t array_len, u8 port)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ size_t i;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ outbox = mailbox->buf;
+
+ ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier,
+ MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < array_len; i++) {
+ if (offset[i] > MLX4_MAILBOX_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ MLX4_GET(value[i], outbox, offset[i]);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+EXPORT_SYMBOL(mlx4_query_diag_counters);
+
int mlx4_get_phys_port_id(struct mlx4_dev *dev)
{
u8 port;
complete(&srq->free);
}
-static int get_pas_size(void *srqc)
+static int get_pas_size(struct mlx5_srq_attr *in)
{
- u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12;
- u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size);
- u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride);
- u32 page_offset = MLX5_GET(srqc, srqc, page_offset);
+ u32 log_page_size = in->log_page_size + 12;
+ u32 log_srq_size = in->log_size;
+ u32 log_rq_stride = in->wqe_shift;
+ u32 page_offset = in->page_offset;
u32 po_quanta = 1 << (log_page_size - 6);
u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
u32 page_size = 1 << log_page_size;
return rq_num_pas * sizeof(u64);
}
-static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc)
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
{
- void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- if (srqc_to_rmpc) {
- switch (MLX5_GET(srqc, srqc, state)) {
- case MLX5_SRQC_STATE_GOOD:
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
- break;
- case MLX5_SRQC_STATE_ERROR:
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR);
- break;
- default:
- pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__,
- __LINE__, MLX5_GET(srqc, srqc, state));
- MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state));
- }
-
- MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature));
- MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size));
- MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4);
- MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size));
- MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset));
- MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm));
- MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd));
- MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr));
- } else {
- switch (MLX5_GET(rmpc, rmpc, state)) {
- case MLX5_RMPC_STATE_RDY:
- MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD);
- break;
- case MLX5_RMPC_STATE_ERR:
- MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR);
- break;
- default:
- pr_warn("%s: %d: Unknown rmp state = 0x%x\n",
- __func__, __LINE__,
- MLX5_GET(rmpc, rmpc, state));
- MLX5_SET(srqc, srqc, state,
- MLX5_GET(rmpc, rmpc, state));
- }
-
- MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature));
- MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz));
- MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4);
- MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz));
- MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset));
- MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm));
- MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd));
- MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr));
- }
+ MLX5_SET(wq, wq, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
+ MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
+ MLX5_SET(wq, wq, log_wq_sz, in->log_size);
+ MLX5_SET(wq, wq, page_offset, in->page_offset);
+ MLX5_SET(wq, wq, lwm, in->lwm);
+ MLX5_SET(wq, wq, pd, in->pd);
+ MLX5_SET64(wq, wq, dbr_addr, in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
+ MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
+ MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
+ MLX5_SET(srqc, srqc, page_offset, in->page_offset);
+ MLX5_SET(srqc, srqc, lwm, in->lwm);
+ MLX5_SET(srqc, srqc, pd, in->pd);
+ MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
+ MLX5_SET(srqc, srqc, xrcd, in->xrcd);
+ MLX5_SET(srqc, srqc, cqn, in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(wq, wq, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
+ in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
+ in->log_size = MLX5_GET(wq, wq, log_wq_sz);
+ in->page_offset = MLX5_GET(wq, wq, page_offset);
+ in->lwm = MLX5_GET(wq, wq, lwm);
+ in->pd = MLX5_GET(wq, wq, pd);
+ in->db_record = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(srqc, srqc, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
+ in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
+ in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
+ in->page_offset = MLX5_GET(srqc, srqc, page_offset);
+ in->lwm = MLX5_GET(srqc, srqc, lwm);
+ in->pd = MLX5_GET(srqc, srqc, pd);
+ in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
}
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
EXPORT_SYMBOL(mlx5_core_get_srq);
static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_create_srq_mbox_in *in, int inlen)
+ struct mlx5_srq_attr *in)
{
- struct mlx5_create_srq_mbox_out out;
+ u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+ void *create_in;
+ void *srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
int err;
- memset(&out, 0, sizeof(out));
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+ create_in = mlx5_vzalloc(inlen);
+ if (!create_in)
+ return -ENOMEM;
+
+ srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+ pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
- in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ);
+ set_srqc(srqc, in);
+ memcpy(pas, in->pas, pas_size);
- err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out),
- sizeof(out));
+ MLX5_SET(create_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_SRQ);
- srq->srqn = be32_to_cpu(out.srqn) & 0xffffff;
+ err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err)
+ srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
return err;
}
static int destroy_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
{
- struct mlx5_destroy_srq_mbox_in in;
- struct mlx5_destroy_srq_mbox_out out;
+ u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
- memset(&in, 0, sizeof(in));
- memset(&out, 0, sizeof(out));
- in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
- in.srqn = cpu_to_be32(srq->srqn);
+ MLX5_SET(destroy_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
- return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in),
- (u32 *)(&out), sizeof(out));
+ return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
+ srq_out, sizeof(srq_out));
}
static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq)
{
- struct mlx5_arm_srq_mbox_in in;
- struct mlx5_arm_srq_mbox_out out;
-
- memset(&in, 0, sizeof(in));
- memset(&out, 0, sizeof(out));
+ /* arm_srq structs missing using identical xrc ones */
+ u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
- in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ);
- in.hdr.opmod = cpu_to_be16(!!is_srq);
- in.srqn = cpu_to_be32(srq->srqn);
- in.lwm = cpu_to_be16(lwm);
+ MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm);
- return mlx5_cmd_exec_check_status(dev, (u32 *)(&in),
- sizeof(in), (u32 *)(&out),
- sizeof(out));
+ return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
+ srq_out, sizeof(srq_out));
}
static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_query_srq_mbox_out *out)
+ struct mlx5_srq_attr *out)
{
- struct mlx5_query_srq_mbox_in in;
+ u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
+ u32 *srq_out;
+ void *srqc;
+ int err;
- memset(&in, 0, sizeof(in));
+ srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out));
+ if (!srq_out)
+ return -ENOMEM;
- in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ);
- in.srqn = cpu_to_be32(srq->srqn);
+ MLX5_SET(query_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_QUERY_SRQ);
+ MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
+ err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
+ srq_out,
+ MLX5_ST_SZ_BYTES(query_srq_out));
+ if (err)
+ goto out;
- return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in),
- (u32 *)out, sizeof(*out));
+ srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+ get_srqc(srqc, out);
+ if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+ kvfree(srq_out);
+ return err;
}
static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
- struct mlx5_create_srq_mbox_in *in,
- int srq_inlen)
+ struct mlx5_srq_attr *in)
{
u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
void *create_in;
- void *srqc;
void *xrc_srqc;
void *pas;
int pas_size;
int inlen;
int err;
- srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry);
- pas_size = get_pas_size(srqc);
+ pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
create_in = mlx5_vzalloc(inlen);
if (!create_in)
xrc_srq_context_entry);
pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
- memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
+ set_srqc(xrc_srqc, in);
+ MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
memcpy(pas, in->pas, pas_size);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
- struct mlx5_query_srq_mbox_out *out)
+ struct mlx5_srq_attr *out)
{
u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
u32 *xrcsrq_out;
- void *srqc;
void *xrc_srqc;
int err;
xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
xrc_srq_context_entry);
- srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
- memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc));
+ get_srqc(xrc_srqc, out);
+ if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
out:
kvfree(xrcsrq_out);
}
static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_create_srq_mbox_in *in, int srq_inlen)
+ struct mlx5_srq_attr *in)
{
void *create_in;
void *rmpc;
- void *srqc;
+ void *wq;
int pas_size;
int inlen;
int err;
- srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry);
- pas_size = get_pas_size(srqc);
+ pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
create_in = mlx5_vzalloc(inlen);
if (!create_in)
return -ENOMEM;
rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ set_wq(wq, in);
memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
- rmpc_srqc_reformat(srqc, rmpc, true);
err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
}
static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_query_srq_mbox_out *out)
+ struct mlx5_srq_attr *out)
{
u32 *rmp_out;
void *rmpc;
- void *srqc;
int err;
rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out));
if (err)
goto out;
- srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
- rmpc_srqc_reformat(srqc, rmpc, false);
+ get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+ if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
out:
kvfree(rmp_out);
static int create_srq_split(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq,
- struct mlx5_create_srq_mbox_in *in,
- int inlen, int is_xrc)
+ struct mlx5_srq_attr *in)
{
if (!dev->issi)
- return create_srq_cmd(dev, srq, in, inlen);
+ return create_srq_cmd(dev, srq, in);
else if (srq->common.res == MLX5_RES_XSRQ)
- return create_xrc_srq_cmd(dev, srq, in, inlen);
+ return create_xrc_srq_cmd(dev, srq, in);
else
- return create_rmp_cmd(dev, srq, in, inlen);
+ return create_rmp_cmd(dev, srq, in);
}
static int destroy_srq_split(struct mlx5_core_dev *dev,
}
int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_create_srq_mbox_in *in, int inlen,
- int is_xrc)
+ struct mlx5_srq_attr *in)
{
int err;
struct mlx5_srq_table *table = &dev->priv.srq_table;
- srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ;
+ if (in->type == IB_SRQT_XRC)
+ srq->common.res = MLX5_RES_XSRQ;
+ else
+ srq->common.res = MLX5_RES_SRQ;
- err = create_srq_split(dev, srq, in, inlen, is_xrc);
+ err = create_srq_split(dev, srq, in);
if (err)
return err;
EXPORT_SYMBOL(mlx5_core_destroy_srq);
int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_query_srq_mbox_out *out)
+ struct mlx5_srq_attr *out)
{
if (!dev->issi)
return query_srq_cmd(dev, srq, out);
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_rq);
int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
{
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_rq);
int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
{
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_rqt);
int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
int inlen)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_rqt);
if (bridge->is_going_away)
return;
+ if (bridge->pci_dev)
+ pm_runtime_get_sync(&bridge->pci_dev->dev);
+
list_for_each_entry(slot, &bridge->slots, node) {
struct pci_bus *bus = slot->bus;
struct pci_dev *dev, *tmp;
disable_slot(slot);
}
}
+
+ if (bridge->pci_dev)
+ pm_runtime_put(&bridge->pci_dev->dev);
}
/*
#include <linux/delay.h>
#include <linux/acpi.h>
#include <linux/freezer.h>
+#include <linux/kmod.h>
#include <linux/kthread.h>
#include <asm/page.h>
config RTC_LIB
bool
+config RTC_MC146818_LIB
+ bool
+ select RTC_LIB
+
menuconfig RTC_CLASS
bool "Real Time Clock"
default n
will be called rtc-em3027.
config RTC_DRV_RV8803
- tristate "Micro Crystal RV8803"
+ tristate "Micro Crystal RV8803, Epson RX8900"
help
- If you say yes here you get support for the Micro Crystal
- RV8803 RTC chips.
+ If you say yes here you get support for the Micro Crystal RV8803 and
+ Epson RX8900 RTC chips.
This driver can also be built as a module. If so, the module
will be called rtc-rv8803.
This driver can also be built as a module. If so, the module
will be called rtc-ds1390.
+config RTC_DRV_MAX6916
+ tristate "Maxim MAX6916"
+ help
+ If you say yes here you will get support for the
+ Maxim MAX6916 SPI RTC chip.
+
+ This driver only supports the RTC feature, and not other chip
+ features such as alarms.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-max6916.
+
config RTC_DRV_R9701
tristate "Epson RTC-9701JE"
help
config RTC_DRV_CMOS
tristate "PC-style 'CMOS'"
- depends on X86 || ARM || M32R || PPC || MIPS || SPARC64
+ depends on X86 || ARM || M32R || PPC || MIPS || SPARC64 || MN10300
default y if X86
+ select RTC_MC146818_LIB
help
Say "yes" here to get direct support for the real time clock
found in every PC or ACPI-based system, and some other boards.
config RTC_DRV_ALPHA
bool "Alpha PC-style CMOS"
depends on ALPHA
+ select RTC_MC146818_LIB
default y
help
Direct support for the real-time clock found on every Alpha
obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o
obj-$(CONFIG_RTC_SYSTOHC) += systohc.o
obj-$(CONFIG_RTC_CLASS) += rtc-core.o
+obj-$(CONFIG_RTC_MC146818_LIB) += rtc-mc146818-lib.o
rtc-core-y := class.o interface.o
ifdef CONFIG_RTC_DRV_EFI
obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
+obj-$(CONFIG_RTC_DRV_MAX6916) += rtc-max6916.o
obj-$(CONFIG_RTC_DRV_MAX77686) += rtc-max77686.o
obj-$(CONFIG_RTC_DRV_MAX8907) += rtc-max8907.o
obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o
else if (!rtc->ops->read_alarm)
err = -EINVAL;
else {
- memset(alarm, 0, sizeof(struct rtc_wkalrm));
+ alarm->enabled = 0;
+ alarm->pending = 0;
+ alarm->time.tm_sec = -1;
+ alarm->time.tm_min = -1;
+ alarm->time.tm_hour = -1;
+ alarm->time.tm_mday = -1;
+ alarm->time.tm_mon = -1;
+ alarm->time.tm_year = -1;
+ alarm->time.tm_wday = -1;
+ alarm->time.tm_yday = -1;
+ alarm->time.tm_isdst = -1;
err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
}
rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
rtc->aie_timer.period = ktime_set(0, 0);
- /* Alarm has to be enabled & in the futrure for us to enqueue it */
+ /* Alarm has to be enabled & in the future for us to enqueue it */
if (alarm->enabled && (rtc_tm_to_ktime(now).tv64 <
rtc->aie_timer.node.expires.tv64)) {
}
EXPORT_SYMBOL_GPL(rtc_initialize_alarm);
-
-
int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
{
int err = mutex_lock_interruptible(&rtc->ops_lock);
*/
static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
{
+ struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
+ struct rtc_time tm;
+ ktime_t now;
+
timer->enabled = 1;
+ __rtc_read_time(rtc, &tm);
+ now = rtc_tm_to_ktime(tm);
+
+ /* Skip over expired timers */
+ while (next) {
+ if (next->expires.tv64 >= now.tv64)
+ break;
+ next = timerqueue_iterate_next(next);
+ }
+
timerqueue_add(&rtc->timerqueue, &timer->node);
- if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) {
+ if (!next) {
struct rtc_wkalrm alarm;
int err;
alarm.time = rtc_ktime_to_tm(timer->node.expires);
return err;
}
- err = devm_add_action(&client->dev, rtc_calib_remove_sysfs_group,
- &client->dev);
- if (err) {
- rtc_calib_remove_sysfs_group(&client->dev);
+ err = devm_add_action_or_reset(&client->dev,
+ rtc_calib_remove_sysfs_group,
+ &client->dev);
+ if (err)
dev_err(&client->dev,
"Failed to add sysfs cleanup action: %d\n",
err);
- return err;
- }
- return 0;
+ return err;
}
static int abx80x_remove(struct i2c_client *client)
.remove = asm9260_rtc_remove,
.driver = {
.name = "asm9260-rtc",
- .owner = THIS_MODULE,
.of_match_table = asm9260_dt_ids,
},
};
if (!rtc)
return -ENOMEM;
+ spin_lock_init(&rtc->lock);
rtc->irq = irq;
/* platform setup code should have handled this; sigh */
#include <linux/of_platform.h>
/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
-#include <asm-generic/rtc.h>
+#include <linux/mc146818rtc.h>
struct cmos_rtc {
struct rtc_device *rtc;
static int cmos_read_time(struct device *dev, struct rtc_time *t)
{
/* REVISIT: if the clock has a "century" register, use
- * that instead of the heuristic in get_rtc_time().
+ * that instead of the heuristic in mc146818_get_time().
* That'll make Y3K compatility (year > 2070) easy!
*/
- get_rtc_time(t);
+ mc146818_get_time(t);
return 0;
}
* takes effect exactly 500ms after we write the register.
* (Also queueing and other delays before we get this far.)
*/
- return set_rtc_time(t);
+ return mc146818_set_time(t);
}
static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
* Some also support day and month, for alarms up to a year in
* the future.
*/
- t->time.tm_mday = -1;
- t->time.tm_mon = -1;
spin_lock_irq(&rtc_lock);
t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
}
}
}
- t->time.tm_year = -1;
t->enabled = !!(rtc_control & RTC_AIE);
t->pending = 0;
address_space = 64;
#elif defined(__i386__) || defined(__x86_64__) || defined(__arm__) \
|| defined(__sparc__) || defined(__mips__) \
- || defined(__powerpc__)
+ || defined(__powerpc__) || defined(CONFIG_MN10300)
address_space = 128;
#else
#warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
if (val)
CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
- get_rtc_time(&time);
+ cmos_read_time(&pdev->dev, &time);
ret = rtc_valid_tm(&time);
if (ret) {
struct rtc_time def_time = {
.tm_year = 1,
.tm_mday = 1,
};
- set_rtc_time(&def_time);
+ cmos_set_time(&pdev->dev, &def_time);
}
}
#else
rtc_tm->tm_mday = v[0][2] & DA9052_RTC_DAY;
rtc_tm->tm_hour = v[0][1] & DA9052_RTC_HOUR;
rtc_tm->tm_min = v[0][0] & DA9052_RTC_MIN;
+ rtc_tm->tm_sec = 0;
ret = rtc_valid_tm(rtc_tm);
return ret;
rtc_tm->tm_mday = v[2] & DA9055_RTC_ALM_DAY;
rtc_tm->tm_hour = v[1] & DA9055_RTC_ALM_HOUR;
rtc_tm->tm_min = v[0] & DA9055_RTC_ALM_MIN;
+ rtc_tm->tm_sec = 0;
return rtc_valid_tm(rtc_tm);
}
u8 day0, day1;
unsigned long flags;
+ alm->time.tm_sec = 0;
+
spin_lock_irqsave(&davinci_rtc_lock, flags);
davinci_rtcss_calendar_wait(davinci_rtc);
#include <linux/rtc.h>
#include <linux/platform_device.h>
#include <linux/bcd.h>
-#include <linux/ds1286.h>
+#include <linux/rtc/ds1286.h>
#include <linux/io.h>
#include <linux/slab.h>
alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]);
alm->time.tm_min = bcd2bin(buf[DS1305_MIN]);
alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
- alm->time.tm_mday = -1;
- alm->time.tm_mon = -1;
- alm->time.tm_year = -1;
- /* next three fields are unused by Linux */
- alm->time.tm_wday = -1;
- alm->time.tm_mday = -1;
- alm->time.tm_isdst = -1;
return 0;
}
t->time.tm_min = bcd2bin(ds1307->regs[1] & 0x7f);
t->time.tm_hour = bcd2bin(ds1307->regs[2] & 0x3f);
t->time.tm_mday = bcd2bin(ds1307->regs[3] & 0x3f);
- t->time.tm_mon = -1;
- t->time.tm_year = -1;
- t->time.tm_wday = -1;
- t->time.tm_yday = -1;
- t->time.tm_isdst = -1;
/* ... and status */
t->enabled = !!(ds1307->regs[7] & DS1337_BIT_A1IE);
* Alarm support for mcp794xx devices.
*/
+#define MCP794XX_REG_WEEKDAY 0x3
+#define MCP794XX_REG_WEEKDAY_WDAY_MASK 0x7
#define MCP794XX_REG_CONTROL 0x07
# define MCP794XX_BIT_ALM0_EN 0x10
# define MCP794XX_BIT_ALM1_EN 0x20
{
struct ds1307 *ds1307;
int err = -ENODEV;
- int tmp;
+ int tmp, wday;
struct chip_desc *chip = &chips[id->driver_data];
struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
bool want_irq = false;
bool ds1307_can_wakeup_device = false;
unsigned char *buf;
struct ds1307_platform_data *pdata = dev_get_platdata(&client->dev);
+ struct rtc_time tm;
+ unsigned long timestamp;
+
irq_handler_t irq_handler = ds1307_irq;
static const int bbsqi_bitpos[] = {
bin2bcd(tmp));
}
+ /*
+ * Some IPs have weekday reset value = 0x1 which might not correct
+ * hence compute the wday using the current date/month/year values
+ */
+ ds1307_get_time(&client->dev, &tm);
+ wday = tm.tm_wday;
+ timestamp = rtc_tm_to_time64(&tm);
+ rtc_time64_to_tm(timestamp, &tm);
+
+ /*
+ * Check if reset wday is different from the computed wday
+ * If different then set the wday which we computed using
+ * timestamp
+ */
+ if (wday != tm.tm_wday) {
+ wday = i2c_smbus_read_byte_data(client, MCP794XX_REG_WEEKDAY);
+ wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK;
+ wday = wday | (tm.tm_wday + 1);
+ i2c_smbus_write_byte_data(client, MCP794XX_REG_WEEKDAY, wday);
+ }
+
if (want_irq) {
device_set_wakeup_capable(&client->dev, true);
set_bit(HAS_ALARM, &ds1307->flags);
alarm->time.tm_hour = priv->alarm_hour < 0 ? 0 : priv->alarm_hour;
alarm->time.tm_mday = priv->alarm_mday < 0 ? 0 : priv->alarm_mday;
- alarm->time.tm_mon = -1;
- alarm->time.tm_year = -1;
- alarm->time.tm_wday = -1;
- alarm->time.tm_yday = -1;
- alarm->time.tm_isdst = -1;
-
out:
mutex_unlock(&priv->mutex);
return res;
return (val & bin_mask);
}
+/**
+ * s1685_rtc_check_mday - check validity of the day of month.
+ * @rtc: pointer to the ds1685 rtc structure.
+ * @mday: day of month.
+ *
+ * Returns -EDOM if the day of month is not within 1..31 range.
+ */
+static inline int
+ds1685_rtc_check_mday(struct ds1685_priv *rtc, u8 mday)
+{
+ if (rtc->bcd_mode) {
+ if (mday < 0x01 || mday > 0x31 || (mday & 0x0f) > 0x09)
+ return -EDOM;
+ } else {
+ if (mday < 1 || mday > 31)
+ return -EDOM;
+ }
+ return 0;
+}
+
/**
* ds1685_rtc_switch_to_bank0 - switch the rtc to bank 0.
* @rtc: pointer to the ds1685 rtc structure.
struct platform_device *pdev = to_platform_device(dev);
struct ds1685_priv *rtc = platform_get_drvdata(pdev);
u8 seconds, minutes, hours, mday, ctrlb, ctrlc;
+ int ret;
/* Fetch the alarm info from the RTC alarm registers. */
ds1685_rtc_begin_data_access(rtc);
ctrlc = rtc->read(rtc, RTC_CTRL_C);
ds1685_rtc_end_data_access(rtc);
- /* Check month date. */
- if (!(mday >= 1) && (mday <= 31))
- return -EDOM;
+ /* Check the month date for validity. */
+ ret = ds1685_rtc_check_mday(rtc, mday);
+ if (ret)
+ return ret;
/*
* Check the three alarm bytes.
*
* The Linux RTC system doesn't support the "don't care" capability
* of this RTC chip. We check for it anyways in case support is
- * added in the future.
+ * added in the future and only assign when we care.
*/
- if (unlikely(seconds >= 0xc0))
- alrm->time.tm_sec = -1;
- else
+ if (likely(seconds < 0xc0))
alrm->time.tm_sec = ds1685_rtc_bcd2bin(rtc, seconds,
RTC_SECS_BCD_MASK,
RTC_SECS_BIN_MASK);
- if (unlikely(minutes >= 0xc0))
- alrm->time.tm_min = -1;
- else
+ if (likely(minutes < 0xc0))
alrm->time.tm_min = ds1685_rtc_bcd2bin(rtc, minutes,
RTC_MINS_BCD_MASK,
RTC_MINS_BIN_MASK);
- if (unlikely(hours >= 0xc0))
- alrm->time.tm_hour = -1;
- else
+ if (likely(hours < 0xc0))
alrm->time.tm_hour = ds1685_rtc_bcd2bin(rtc, hours,
RTC_HRS_24_BCD_MASK,
RTC_HRS_24_BIN_MASK);
/* Write the data to rtc_wkalrm. */
alrm->time.tm_mday = ds1685_rtc_bcd2bin(rtc, mday, RTC_MDAY_BCD_MASK,
RTC_MDAY_BIN_MASK);
- alrm->time.tm_mon = -1;
- alrm->time.tm_year = -1;
- alrm->time.tm_wday = -1;
- alrm->time.tm_yday = -1;
- alrm->time.tm_isdst = -1;
alrm->enabled = !!(ctrlb & RTC_CTRL_B_AIE);
alrm->pending = !!(ctrlc & RTC_CTRL_C_AF);
struct platform_device *pdev = to_platform_device(dev);
struct ds1685_priv *rtc = platform_get_drvdata(pdev);
u8 ctrlb, seconds, minutes, hours, mday;
+ int ret;
/* Fetch the alarm info and convert to BCD. */
seconds = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_sec,
RTC_MDAY_BCD_MASK);
/* Check the month date for validity. */
- if (!(mday >= 1) && (mday <= 31))
- return -EDOM;
+ ret = ds1685_rtc_check_mday(rtc, mday);
+ if (ret)
+ return ret;
/*
* Check the three alarm bytes.
#include <linux/rtc.h>
#include <linux/types.h>
#include <linux/bcd.h>
-#include <linux/rtc-ds2404.h>
+#include <linux/platform_data/rtc-ds2404.h>
#include <linux/delay.h>
#include <linux/gpio.h>
#include <linux/slab.h>
alarm->time.tm_hour = bcd2bin(buf[2] & 0x7F);
alarm->time.tm_mday = bcd2bin(buf[3] & 0x7F);
- alarm->time.tm_mon = -1;
- alarm->time.tm_year = -1;
- alarm->time.tm_wday = -1;
- alarm->time.tm_yday = -1;
- alarm->time.tm_isdst = -1;
-
alarm->enabled = !!(control & DS3232_REG_CR_A1IE);
alarm->pending = !!(stat & DS3232_REG_SR_A1F);
static int __init efi_rtc_probe(struct platform_device *dev)
{
struct rtc_device *rtc;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ /* First check if the RTC is usable */
+ if (efi.get_time(&eft, &cap) != EFI_SUCCESS)
+ return -ENODEV;
rtc = devm_rtc_device_register(&dev->dev, "rtc-efi", &efi_rtc_ops,
THIS_MODULE);
#include <linux/platform_device.h>
#include <linux/rtc.h>
-#if defined(CONFIG_M68K) || defined(CONFIG_PARISC) || \
- defined(CONFIG_PPC) || defined(CONFIG_SUPERH32)
-#include <asm/rtc.h>
-
-static int generic_get_time(struct device *dev, struct rtc_time *tm)
-{
- unsigned int ret = get_rtc_time(tm);
-
- if (ret & RTC_BATT_BAD)
- return -EOPNOTSUPP;
-
- return rtc_valid_tm(tm);
-}
-
-static int generic_set_time(struct device *dev, struct rtc_time *tm)
-{
- if (set_rtc_time(tm) < 0)
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-static const struct rtc_class_ops generic_rtc_ops = {
- .read_time = generic_get_time,
- .set_time = generic_set_time,
-};
-#else
-#define generic_rtc_ops *(struct rtc_class_ops*)NULL
-#endif
-
static int __init generic_rtc_probe(struct platform_device *dev)
{
struct rtc_device *rtc;
- const struct rtc_class_ops *ops;
-
- ops = dev_get_platdata(&dev->dev);
- if (!ops)
- ops = &generic_rtc_ops;
+ const struct rtc_class_ops *ops = dev_get_platdata(&dev->dev);
rtc = devm_rtc_device_register(&dev->dev, "rtc-generic",
ops, THIS_MODULE);
return ret;
/* The alarm only has a minute accuracy */
- alm_tm->tm_sec = -1;
+ alm_tm->tm_sec = 0;
alm_tm->tm_min = (buf[0] & HYM8563_ALM_BIT_DISABLE) ?
-1 :
-1 :
bcd2bin(buf[3] & HYM8563_WEEKDAY_MASK);
- alm_tm->tm_mon = -1;
- alm_tm->tm_year = -1;
-
ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2);
if (ret < 0)
return ret;
static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
{
struct isl12057_rtc_data *data = dev_get_drvdata(dev);
- struct rtc_time rtc_tm, *alarm_tm = &alarm->time;
- unsigned long rtc_secs, alarm_secs;
+ struct rtc_time *alarm_tm = &alarm->time;
u8 regs[ISL12057_A1_SEC_LEN];
unsigned int ir;
int ret;
alarm_tm->tm_min = bcd2bin(regs[1] & 0x7f);
alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f);
alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f);
- alarm_tm->tm_wday = -1;
-
- /*
- * The alarm section does not store year/month. We use the ones in rtc
- * section as a basis and increment month and then year if needed to get
- * alarm after current time.
- */
- ret = _isl12057_rtc_read_time(dev, &rtc_tm);
- if (ret)
- goto err_unlock;
-
- alarm_tm->tm_year = rtc_tm.tm_year;
- alarm_tm->tm_mon = rtc_tm.tm_mon;
-
- ret = rtc_tm_to_time(&rtc_tm, &rtc_secs);
- if (ret)
- goto err_unlock;
-
- ret = rtc_tm_to_time(alarm_tm, &alarm_secs);
- if (ret)
- goto err_unlock;
-
- if (alarm_secs < rtc_secs) {
- if (alarm_tm->tm_mon == 11) {
- alarm_tm->tm_mon = 0;
- alarm_tm->tm_year += 1;
- } else {
- alarm_tm->tm_mon += 1;
- }
- }
ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir);
if (ret) {
retval = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON, flags);
if (retval < 0) {
- dev_info(dev, "Unable to enable alarm IRQ %d\n", retval);
+ dev_err(dev, "Unable to enable alarm IRQ %d\n", retval);
return retval;
}
return 0;
alrm->time.tm_sec = bcd2bin(alarmvals[4] & 0x7f);
alrm->time.tm_min = bcd2bin(alarmvals[3] & 0x7f);
alrm->time.tm_hour = bcd2bin(alarmvals[2] & 0x3f);
- alrm->time.tm_wday = -1;
alrm->time.tm_mday = bcd2bin(alarmvals[1] & 0x3f);
alrm->time.tm_mon = bcd2bin(alarmvals[0] & 0x3f);
- alrm->time.tm_year = -1;
alrm->enabled = !!(alarmvals[0] & M41T80_ALMON_AFE);
alrm->pending = (flags & M41T80_FLAGS_AF) && alrm->enabled;
.proc = m41t80_rtc_proc,
};
+#ifdef CONFIG_PM_SLEEP
+static int m41t80_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ if (client->irq >= 0 && device_may_wakeup(dev))
+ enable_irq_wake(client->irq);
+
+ return 0;
+}
+
+static int m41t80_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ if (client->irq >= 0 && device_may_wakeup(dev))
+ disable_irq_wake(client->irq);
+
+ return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(m41t80_pm, m41t80_suspend, m41t80_resume);
+
static ssize_t flags_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return rc;
}
- rc = devm_add_action(&client->dev, m41t80_remove_sysfs_group,
- &client->dev);
+ rc = devm_add_action_or_reset(&client->dev, m41t80_remove_sysfs_group,
+ &client->dev);
if (rc) {
- m41t80_remove_sysfs_group(&client->dev);
dev_err(&client->dev,
"Failed to add sysfs cleanup action: %d\n", rc);
return rc;
static struct i2c_driver m41t80_driver = {
.driver = {
.name = "rtc-m41t80",
+ .pm = &m41t80_pm,
},
.probe = m41t80_probe,
.remove = m41t80_remove,
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/platform_device.h>
-#include <linux/m48t86.h>
+#include <linux/platform_data/rtc-m48t86.h>
#include <linux/bcd.h>
#define M48T86_REG_SEC 0x00
--- /dev/null
+/* rtc-max6916.c
+ *
+ * Driver for MAXIM max6916 Low Current, SPI Compatible
+ * Real Time Clock
+ *
+ * Author : Venkat Prashanth B U <venkat.prashanth2498@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/spi/spi.h>
+#include <linux/bcd.h>
+
+/* Registers in max6916 rtc */
+
+#define MAX6916_SECONDS_REG 0x01
+#define MAX6916_MINUTES_REG 0x02
+#define MAX6916_HOURS_REG 0x03
+#define MAX6916_DATE_REG 0x04
+#define MAX6916_MONTH_REG 0x05
+#define MAX6916_DAY_REG 0x06
+#define MAX6916_YEAR_REG 0x07
+#define MAX6916_CONTROL_REG 0x08
+#define MAX6916_STATUS_REG 0x0C
+#define MAX6916_CLOCK_BURST 0x3F
+
+static int max6916_read_reg(struct device *dev, unsigned char address,
+ unsigned char *data)
+{
+ struct spi_device *spi = to_spi_device(dev);
+
+ *data = address | 0x80;
+
+ return spi_write_then_read(spi, data, 1, data, 1);
+}
+
+static int max6916_write_reg(struct device *dev, unsigned char address,
+ unsigned char data)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ unsigned char buf[2];
+
+ buf[0] = address & 0x7F;
+ buf[1] = data;
+
+ return spi_write_then_read(spi, buf, 2, NULL, 0);
+}
+
+static int max6916_read_time(struct device *dev, struct rtc_time *dt)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ int err;
+ unsigned char buf[8];
+
+ buf[0] = MAX6916_CLOCK_BURST | 0x80;
+
+ err = spi_write_then_read(spi, buf, 1, buf, 8);
+
+ if (err)
+ return err;
+
+ dt->tm_sec = bcd2bin(buf[0]);
+ dt->tm_min = bcd2bin(buf[1]);
+ dt->tm_hour = bcd2bin(buf[2] & 0x3F);
+ dt->tm_mday = bcd2bin(buf[3]);
+ dt->tm_mon = bcd2bin(buf[4]) - 1;
+ dt->tm_wday = bcd2bin(buf[5]) - 1;
+ dt->tm_year = bcd2bin(buf[6]) + 100;
+
+ return rtc_valid_tm(dt);
+}
+
+static int max6916_set_time(struct device *dev, struct rtc_time *dt)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ unsigned char buf[9];
+
+ if (dt->tm_year < 100 || dt->tm_year > 199) {
+ dev_err(&spi->dev, "Year must be between 2000 and 2099. It's %d.\n",
+ dt->tm_year + 1900);
+ return -EINVAL;
+ }
+
+ buf[0] = MAX6916_CLOCK_BURST & 0x7F;
+ buf[1] = bin2bcd(dt->tm_sec);
+ buf[2] = bin2bcd(dt->tm_min);
+ buf[3] = (bin2bcd(dt->tm_hour) & 0X3F);
+ buf[4] = bin2bcd(dt->tm_mday);
+ buf[5] = bin2bcd(dt->tm_mon + 1);
+ buf[6] = bin2bcd(dt->tm_wday + 1);
+ buf[7] = bin2bcd(dt->tm_year % 100);
+ buf[8] = bin2bcd(0x00);
+
+ /* write the rtc settings */
+ return spi_write_then_read(spi, buf, 9, NULL, 0);
+}
+
+static const struct rtc_class_ops max6916_rtc_ops = {
+ .read_time = max6916_read_time,
+ .set_time = max6916_set_time,
+};
+
+static int max6916_probe(struct spi_device *spi)
+{
+ struct rtc_device *rtc;
+ unsigned char data;
+ int res;
+
+ /* spi setup with max6916 in mode 3 and bits per word as 8 */
+ spi->mode = SPI_MODE_3;
+ spi->bits_per_word = 8;
+ spi_setup(spi);
+
+ /* RTC Settings */
+ res = max6916_read_reg(&spi->dev, MAX6916_SECONDS_REG, &data);
+ if (res)
+ return res;
+
+ /* Disable the write protect of rtc */
+ max6916_read_reg(&spi->dev, MAX6916_CONTROL_REG, &data);
+ data = data & ~(1 << 7);
+ max6916_write_reg(&spi->dev, MAX6916_CONTROL_REG, data);
+
+ /*Enable oscillator,disable oscillator stop flag, glitch filter*/
+ max6916_read_reg(&spi->dev, MAX6916_STATUS_REG, &data);
+ data = data & 0x1B;
+ max6916_write_reg(&spi->dev, MAX6916_STATUS_REG, data);
+
+ /* display the settings */
+ max6916_read_reg(&spi->dev, MAX6916_CONTROL_REG, &data);
+ dev_info(&spi->dev, "MAX6916 RTC CTRL Reg = 0x%02x\n", data);
+
+ max6916_read_reg(&spi->dev, MAX6916_STATUS_REG, &data);
+ dev_info(&spi->dev, "MAX6916 RTC Status Reg = 0x%02x\n", data);
+
+ rtc = devm_rtc_device_register(&spi->dev, "max6916",
+ &max6916_rtc_ops, THIS_MODULE);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ spi_set_drvdata(spi, rtc);
+
+ return 0;
+}
+
+static struct spi_driver max6916_driver = {
+ .driver = {
+ .name = "max6916",
+ },
+ .probe = max6916_probe,
+};
+module_spi_driver(max6916_driver);
+
+MODULE_DESCRIPTION("MAX6916 SPI RTC DRIVER");
+MODULE_AUTHOR("Venkat Prashanth B U <venkat.prashanth2498@gmail.com>");
+MODULE_LICENSE("GPL v2");
--- /dev/null
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/mc146818rtc.h>
+
+#ifdef CONFIG_ACPI
+#include <linux/acpi.h>
+#endif
+
+/*
+ * Returns true if a clock update is in progress
+ */
+static inline unsigned char mc146818_is_updating(void)
+{
+ unsigned char uip;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return uip;
+}
+
+unsigned int mc146818_get_time(struct rtc_time *time)
+{
+ unsigned char ctrl;
+ unsigned long flags;
+ unsigned char century = 0;
+
+#ifdef CONFIG_MACH_DECSTATION
+ unsigned int real_year;
+#endif
+
+ /*
+ * read RTC once any update in progress is done. The update
+ * can take just over 2ms. We wait 20ms. There is no need to
+ * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
+ * If you need to know *exactly* when a second has started, enable
+ * periodic update complete interrupts, (via ioctl) and then
+ * immediately read /dev/rtc which will block until you get the IRQ.
+ * Once the read clears, read the RTC time (again via ioctl). Easy.
+ */
+ if (mc146818_is_updating())
+ mdelay(20);
+
+ /*
+ * Only the values that we read from the RTC are set. We leave
+ * tm_wday, tm_yday and tm_isdst untouched. Even though the
+ * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
+ * by the RTC when initially set to a non-zero value.
+ */
+ spin_lock_irqsave(&rtc_lock, flags);
+ time->tm_sec = CMOS_READ(RTC_SECONDS);
+ time->tm_min = CMOS_READ(RTC_MINUTES);
+ time->tm_hour = CMOS_READ(RTC_HOURS);
+ time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
+ time->tm_mon = CMOS_READ(RTC_MONTH);
+ time->tm_year = CMOS_READ(RTC_YEAR);
+#ifdef CONFIG_MACH_DECSTATION
+ real_year = CMOS_READ(RTC_DEC_YEAR);
+#endif
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+ acpi_gbl_FADT.century)
+ century = CMOS_READ(acpi_gbl_FADT.century);
+#endif
+ ctrl = CMOS_READ(RTC_CONTROL);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ time->tm_sec = bcd2bin(time->tm_sec);
+ time->tm_min = bcd2bin(time->tm_min);
+ time->tm_hour = bcd2bin(time->tm_hour);
+ time->tm_mday = bcd2bin(time->tm_mday);
+ time->tm_mon = bcd2bin(time->tm_mon);
+ time->tm_year = bcd2bin(time->tm_year);
+ century = bcd2bin(century);
+ }
+
+#ifdef CONFIG_MACH_DECSTATION
+ time->tm_year += real_year - 72;
+#endif
+
+ if (century)
+ time->tm_year += (century - 19) * 100;
+
+ /*
+ * Account for differences between how the RTC uses the values
+ * and how they are defined in a struct rtc_time;
+ */
+ if (time->tm_year <= 69)
+ time->tm_year += 100;
+
+ time->tm_mon--;
+
+ return RTC_24H;
+}
+EXPORT_SYMBOL_GPL(mc146818_get_time);
+
+/* Set the current date and time in the real time clock. */
+int mc146818_set_time(struct rtc_time *time)
+{
+ unsigned long flags;
+ unsigned char mon, day, hrs, min, sec;
+ unsigned char save_control, save_freq_select;
+ unsigned int yrs;
+#ifdef CONFIG_MACH_DECSTATION
+ unsigned int real_yrs, leap_yr;
+#endif
+ unsigned char century = 0;
+
+ yrs = time->tm_year;
+ mon = time->tm_mon + 1; /* tm_mon starts at zero */
+ day = time->tm_mday;
+ hrs = time->tm_hour;
+ min = time->tm_min;
+ sec = time->tm_sec;
+
+ if (yrs > 255) /* They are unsigned */
+ return -EINVAL;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+#ifdef CONFIG_MACH_DECSTATION
+ real_yrs = yrs;
+ leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) ||
+ !((yrs + 1900) % 400));
+ yrs = 72;
+
+ /*
+ * We want to keep the year set to 73 until March
+ * for non-leap years, so that Feb, 29th is handled
+ * correctly.
+ */
+ if (!leap_yr && mon < 3) {
+ real_yrs--;
+ yrs = 73;
+ }
+#endif
+
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+ acpi_gbl_FADT.century) {
+ century = (yrs + 1900) / 100;
+ yrs %= 100;
+ }
+#endif
+
+ /* These limits and adjustments are independent of
+ * whether the chip is in binary mode or not.
+ */
+ if (yrs > 169) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return -EINVAL;
+ }
+
+ if (yrs >= 100)
+ yrs -= 100;
+
+ if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
+ || RTC_ALWAYS_BCD) {
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+ day = bin2bcd(day);
+ mon = bin2bcd(mon);
+ yrs = bin2bcd(yrs);
+ century = bin2bcd(century);
+ }
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+#ifdef CONFIG_MACH_DECSTATION
+ CMOS_WRITE(real_yrs, RTC_DEC_YEAR);
+#endif
+ CMOS_WRITE(yrs, RTC_YEAR);
+ CMOS_WRITE(mon, RTC_MONTH);
+ CMOS_WRITE(day, RTC_DAY_OF_MONTH);
+ CMOS_WRITE(hrs, RTC_HOURS);
+ CMOS_WRITE(min, RTC_MINUTES);
+ CMOS_WRITE(sec, RTC_SECONDS);
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+ acpi_gbl_FADT.century)
+ CMOS_WRITE(century, acpi_gbl_FADT.century);
+#endif
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mc146818_set_time);
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
+#include <linux/mc146818rtc.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sfi.h>
-#include <asm-generic/rtc.h>
#include <asm/intel_scu_ipc.h>
#include <asm/intel-mid.h>
#include <asm/intel_mid_vrtc.h>
if (mrst->irq <= 0)
return -EIO;
- /* Basic alarms only support hour, minute, and seconds fields.
- * Some also support day and month, for alarms up to a year in
- * the future.
- */
- t->time.tm_mday = -1;
- t->time.tm_mon = -1;
- t->time.tm_year = -1;
-
/* vRTC only supports binary mode */
spin_lock_irq(&rtc_lock);
t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM);
#define CD_TMR_TE BIT(3) /* Countdown timer enable */
/* PCF2123_REG_OFFSET BITS */
-#define OFFSET_SIGN_BIT BIT(6) /* 2's complement sign bit */
+#define OFFSET_SIGN_BIT 6 /* 2's complement sign bit */
#define OFFSET_COARSE BIT(7) /* Coarse mode offset */
#define OFFSET_STEP (2170) /* Offset step in parts per billion */
if (reg & OFFSET_COARSE)
reg <<= 1; /* multiply by 2 and sign extend */
else
- reg |= (reg & OFFSET_SIGN_BIT) << 1; /* sign extend only */
+ reg = sign_extend32(reg, OFFSET_SIGN_BIT);
*offset = ((long)reg) * OFFSET_STEP;
#include <linux/rtc.h>
#include <linux/module.h>
+/*
+ * Information for this driver was pulled from the following datasheets.
+ *
+ * http://www.nxp.com/documents/data_sheet/PCF85063A.pdf
+ * http://www.nxp.com/documents/data_sheet/PCF85063TP.pdf
+ *
+ * PCF85063A -- Rev. 6 — 18 November 2015
+ * PCF85063TP -- Rev. 4 — 6 May 2015
+*/
+
#define PCF85063_REG_CTRL1 0x00 /* status */
#define PCF85063_REG_CTRL1_STOP BIT(5)
#define PCF85063_REG_CTRL2 0x01
return 0;
}
-/*
- * In the routines that deal directly with the pcf85063 hardware, we use
- * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
- */
+static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
+{
+ s32 ret;
+
+ /* start the clock */
+ ctrl1 &= PCF85063_REG_CTRL1_STOP;
+
+ ret = i2c_smbus_write_byte_data(client, PCF85063_REG_CTRL1, ctrl1);
+ if (ret < 0) {
+ dev_err(&client->dev, "Failing to start the clock\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
{
int rc;
tm->tm_wday = regs[4] & 0x07;
tm->tm_mon = bcd2bin(regs[5] & 0x1F) - 1; /* rtc mn 1-12 */
tm->tm_year = bcd2bin(regs[6]);
- if (tm->tm_year < 70)
- tm->tm_year += 100; /* assume we are in 1970...2069 */
+ tm->tm_year += 100;
return rtc_valid_tm(tm);
}
static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
{
int rc;
- u8 regs[8];
+ u8 regs[7];
+ u8 ctrl1;
+
+ if ((tm->tm_year < 100) || (tm->tm_year > 199))
+ return -EINVAL;
/*
* to accurately set the time, reset the divider chain and keep it in
* reset state until all time/date registers are written
*/
- rc = pcf85063_stop_clock(client, ®s[7]);
+ rc = pcf85063_stop_clock(client, &ctrl1);
if (rc != 0)
return rc;
regs[5] = bin2bcd(tm->tm_mon + 1);
/* year and century */
- regs[6] = bin2bcd(tm->tm_year % 100);
-
- /*
- * after all time/date registers are written, let the 'address auto
- * increment' feature wrap around and write register CTRL1 to re-enable
- * the clock divider chain again
- */
- regs[7] &= ~PCF85063_REG_CTRL1_STOP;
+ regs[6] = bin2bcd(tm->tm_year - 100);
/* write all registers at once */
rc = i2c_smbus_write_i2c_block_data(client, PCF85063_REG_SC,
return rc;
}
+ /*
+ * Write the control register as a separate action since the size of
+ * the register space is different between the PCF85063TP and
+ * PCF85063A devices. The rollover point can not be used.
+ */
+ rc = pcf85063_start_clock(client, ctrl1);
+ if (rc != 0)
+ return rc;
+
return 0;
}
"%s: raw data is min=%02x, hr=%02x, mday=%02x, wday=%02x\n",
__func__, buf[0], buf[1], buf[2], buf[3]);
+ tm->time.tm_sec = 0;
tm->time.tm_min = bcd2bin(buf[0] & 0x7F);
tm->time.tm_hour = bcd2bin(buf[1] & 0x3F);
tm->time.tm_mday = bcd2bin(buf[2] & 0x3F);
tm->time.tm_wday = bcd2bin(buf[3] & 0x7);
- tm->time.tm_mon = -1;
- tm->time.tm_year = -1;
- tm->time.tm_yday = -1;
- tm->time.tm_isdst = -1;
err = pcf8563_get_alarm_mode(client, &tm->enabled, &tm->pending);
if (err < 0)
return ret;
}
+ alm->time.tm_sec = 0;
alm->time.tm_min = bcd2bin(alarm_data[0]);
alm->time.tm_hour = bcd2bin(alarm_data[1]);
alm->time.tm_mday = bcd2bin(alarm_data[2]);
t->time.tm_sec = 0;
t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
- t->time.tm_mday = -1;
- t->time.tm_mon = -1;
- t->time.tm_year = -1;
- t->time.tm_wday = -1;
- t->time.tm_yday = -1;
- t->time.tm_isdst = -1;
/* ... and status */
t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE);
#include <linux/bcd.h>
#include <linux/bitops.h>
+#include <linux/log2.h>
#include <linux/i2c.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/rtc.h>
+#define RV8803_I2C_TRY_COUNT 4
+
#define RV8803_SEC 0x00
#define RV8803_MIN 0x01
#define RV8803_HOUR 0x02
u8 ctrl;
};
+static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
+{
+ int try = RV8803_I2C_TRY_COUNT;
+ s32 ret;
+
+ /*
+ * There is a 61µs window during which the RTC does not acknowledge I2C
+ * transfers. In that case, ensure that there are multiple attempts.
+ */
+ do
+ ret = i2c_smbus_read_byte_data(client, reg);
+ while ((ret == -ENXIO || ret == -EIO) && --try);
+ if (ret < 0)
+ dev_err(&client->dev, "Unable to read register 0x%02x\n", reg);
+
+ return ret;
+}
+
+static int rv8803_read_regs(const struct i2c_client *client,
+ u8 reg, u8 count, u8 *values)
+{
+ int try = RV8803_I2C_TRY_COUNT;
+ s32 ret;
+
+ do
+ ret = i2c_smbus_read_i2c_block_data(client, reg, count, values);
+ while ((ret == -ENXIO || ret == -EIO) && --try);
+ if (ret != count) {
+ dev_err(&client->dev,
+ "Unable to read registers 0x%02x..0x%02x\n",
+ reg, reg + count - 1);
+ return ret < 0 ? ret : -EIO;
+ }
+
+ return 0;
+}
+
+static int rv8803_write_reg(const struct i2c_client *client, u8 reg, u8 value)
+{
+ int try = RV8803_I2C_TRY_COUNT;
+ s32 ret;
+
+ do
+ ret = i2c_smbus_write_byte_data(client, reg, value);
+ while ((ret == -ENXIO || ret == -EIO) && --try);
+ if (ret)
+ dev_err(&client->dev, "Unable to write register 0x%02x\n", reg);
+
+ return ret;
+}
+
+static int rv8803_write_regs(const struct i2c_client *client,
+ u8 reg, u8 count, const u8 *values)
+{
+ int try = RV8803_I2C_TRY_COUNT;
+ s32 ret;
+
+ do
+ ret = i2c_smbus_write_i2c_block_data(client, reg, count,
+ values);
+ while ((ret == -ENXIO || ret == -EIO) && --try);
+ if (ret)
+ dev_err(&client->dev,
+ "Unable to write registers 0x%02x..0x%02x\n",
+ reg, reg + count - 1);
+
+ return ret;
+}
+
static irqreturn_t rv8803_handle_irq(int irq, void *dev_id)
{
struct i2c_client *client = dev_id;
struct rv8803_data *rv8803 = i2c_get_clientdata(client);
unsigned long events = 0;
- int flags, try = 0;
+ int flags;
mutex_lock(&rv8803->flags_lock);
- do {
- flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
- try++;
- } while ((flags == -ENXIO) && (try < 3));
+ flags = rv8803_read_reg(client, RV8803_FLAG);
if (flags <= 0) {
mutex_unlock(&rv8803->flags_lock);
return IRQ_NONE;
if (events) {
rtc_update_irq(rv8803->rtc, 1, events);
- i2c_smbus_write_byte_data(client, RV8803_FLAG, flags);
- i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL,
- rv8803->ctrl);
+ rv8803_write_reg(client, RV8803_FLAG, flags);
+ rv8803_write_reg(rv8803->client, RV8803_CTRL, rv8803->ctrl);
}
mutex_unlock(&rv8803->flags_lock);
u8 *date = date1;
int ret, flags;
- flags = i2c_smbus_read_byte_data(rv8803->client, RV8803_FLAG);
+ flags = rv8803_read_reg(rv8803->client, RV8803_FLAG);
if (flags < 0)
return flags;
return -EINVAL;
}
- ret = i2c_smbus_read_i2c_block_data(rv8803->client, RV8803_SEC,
- 7, date);
- if (ret != 7)
- return ret < 0 ? ret : -EIO;
+ ret = rv8803_read_regs(rv8803->client, RV8803_SEC, 7, date);
+ if (ret)
+ return ret;
if ((date1[RV8803_SEC] & 0x7f) == bin2bcd(59)) {
- ret = i2c_smbus_read_i2c_block_data(rv8803->client, RV8803_SEC,
- 7, date2);
- if (ret != 7)
- return ret < 0 ? ret : -EIO;
+ ret = rv8803_read_regs(rv8803->client, RV8803_SEC, 7, date2);
+ if (ret)
+ return ret;
if ((date2[RV8803_SEC] & 0x7f) != bin2bcd(59))
date = date2;
tm->tm_sec = bcd2bin(date[RV8803_SEC] & 0x7f);
tm->tm_min = bcd2bin(date[RV8803_MIN] & 0x7f);
tm->tm_hour = bcd2bin(date[RV8803_HOUR] & 0x3f);
- tm->tm_wday = ffs(date[RV8803_WEEK] & 0x7f);
+ tm->tm_wday = ilog2(date[RV8803_WEEK] & 0x7f);
tm->tm_mday = bcd2bin(date[RV8803_DAY] & 0x3f);
tm->tm_mon = bcd2bin(date[RV8803_MONTH] & 0x1f) - 1;
tm->tm_year = bcd2bin(date[RV8803_YEAR]) + 100;
- return rtc_valid_tm(tm);
+ return 0;
}
static int rv8803_set_time(struct device *dev, struct rtc_time *tm)
{
struct rv8803_data *rv8803 = dev_get_drvdata(dev);
u8 date[7];
- int flags, ret;
+ int ctrl, flags, ret;
if ((tm->tm_year < 100) || (tm->tm_year > 199))
return -EINVAL;
+ ctrl = rv8803_read_reg(rv8803->client, RV8803_CTRL);
+ if (ctrl < 0)
+ return ctrl;
+
+ /* Stop the clock */
+ ret = rv8803_write_reg(rv8803->client, RV8803_CTRL,
+ ctrl | RV8803_CTRL_RESET);
+ if (ret)
+ return ret;
+
date[RV8803_SEC] = bin2bcd(tm->tm_sec);
date[RV8803_MIN] = bin2bcd(tm->tm_min);
date[RV8803_HOUR] = bin2bcd(tm->tm_hour);
date[RV8803_MONTH] = bin2bcd(tm->tm_mon + 1);
date[RV8803_YEAR] = bin2bcd(tm->tm_year - 100);
- ret = i2c_smbus_write_i2c_block_data(rv8803->client, RV8803_SEC,
- 7, date);
- if (ret < 0)
+ ret = rv8803_write_regs(rv8803->client, RV8803_SEC, 7, date);
+ if (ret)
+ return ret;
+
+ /* Restart the clock */
+ ret = rv8803_write_reg(rv8803->client, RV8803_CTRL,
+ ctrl & ~RV8803_CTRL_RESET);
+ if (ret)
return ret;
mutex_lock(&rv8803->flags_lock);
- flags = i2c_smbus_read_byte_data(rv8803->client, RV8803_FLAG);
+ flags = rv8803_read_reg(rv8803->client, RV8803_FLAG);
if (flags < 0) {
mutex_unlock(&rv8803->flags_lock);
return flags;
}
- ret = i2c_smbus_write_byte_data(rv8803->client, RV8803_FLAG,
- flags & ~RV8803_FLAG_V2F);
+ ret = rv8803_write_reg(rv8803->client, RV8803_FLAG,
+ flags & ~(RV8803_FLAG_V1F | RV8803_FLAG_V2F));
mutex_unlock(&rv8803->flags_lock);
u8 alarmvals[3];
int flags, ret;
- ret = i2c_smbus_read_i2c_block_data(client, RV8803_ALARM_MIN,
- 3, alarmvals);
- if (ret != 3)
- return ret < 0 ? ret : -EIO;
+ ret = rv8803_read_regs(client, RV8803_ALARM_MIN, 3, alarmvals);
+ if (ret)
+ return ret;
- flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+ flags = rv8803_read_reg(client, RV8803_FLAG);
if (flags < 0)
return flags;
alrm->time.tm_sec = 0;
alrm->time.tm_min = bcd2bin(alarmvals[0] & 0x7f);
alrm->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f);
- alrm->time.tm_wday = -1;
alrm->time.tm_mday = bcd2bin(alarmvals[2] & 0x3f);
- alrm->time.tm_mon = -1;
- alrm->time.tm_year = -1;
alrm->enabled = !!(rv8803->ctrl & RV8803_CTRL_AIE);
alrm->pending = (flags & RV8803_FLAG_AF) && alrm->enabled;
mutex_lock(&rv8803->flags_lock);
- ret = i2c_smbus_read_i2c_block_data(client, RV8803_FLAG, 2, ctrl);
- if (ret != 2) {
+ ret = rv8803_read_regs(client, RV8803_FLAG, 2, ctrl);
+ if (ret) {
mutex_unlock(&rv8803->flags_lock);
- return ret < 0 ? ret : -EIO;
+ return ret;
}
alarmvals[0] = bin2bcd(alrm->time.tm_min);
if (rv8803->ctrl & (RV8803_CTRL_AIE | RV8803_CTRL_UIE)) {
rv8803->ctrl &= ~(RV8803_CTRL_AIE | RV8803_CTRL_UIE);
- err = i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL,
- rv8803->ctrl);
+ err = rv8803_write_reg(rv8803->client, RV8803_CTRL,
+ rv8803->ctrl);
if (err) {
mutex_unlock(&rv8803->flags_lock);
return err;
}
ctrl[1] &= ~RV8803_FLAG_AF;
- err = i2c_smbus_write_byte_data(rv8803->client, RV8803_FLAG, ctrl[1]);
+ err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[1]);
mutex_unlock(&rv8803->flags_lock);
if (err)
return err;
- err = i2c_smbus_write_i2c_block_data(rv8803->client, RV8803_ALARM_MIN,
- 3, alarmvals);
+ err = rv8803_write_regs(rv8803->client, RV8803_ALARM_MIN, 3, alarmvals);
if (err)
return err;
if (rv8803->rtc->aie_timer.enabled)
rv8803->ctrl |= RV8803_CTRL_AIE;
- err = i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL,
- rv8803->ctrl);
+ err = rv8803_write_reg(rv8803->client, RV8803_CTRL,
+ rv8803->ctrl);
if (err)
return err;
}
}
mutex_lock(&rv8803->flags_lock);
- flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+ flags = rv8803_read_reg(client, RV8803_FLAG);
if (flags < 0) {
mutex_unlock(&rv8803->flags_lock);
return flags;
}
flags &= ~(RV8803_FLAG_AF | RV8803_FLAG_UF);
- err = i2c_smbus_write_byte_data(client, RV8803_FLAG, flags);
+ err = rv8803_write_reg(client, RV8803_FLAG, flags);
mutex_unlock(&rv8803->flags_lock);
if (err)
return err;
if (ctrl != rv8803->ctrl) {
rv8803->ctrl = ctrl;
- err = i2c_smbus_write_byte_data(client, RV8803_CTRL,
- rv8803->ctrl);
+ err = rv8803_write_reg(client, RV8803_CTRL, rv8803->ctrl);
if (err)
return err;
}
switch (cmd) {
case RTC_VL_READ:
- flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+ flags = rv8803_read_reg(client, RV8803_FLAG);
if (flags < 0)
return flags;
case RTC_VL_CLR:
mutex_lock(&rv8803->flags_lock);
- flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+ flags = rv8803_read_reg(client, RV8803_FLAG);
if (flags < 0) {
mutex_unlock(&rv8803->flags_lock);
return flags;
}
flags &= ~(RV8803_FLAG_V1F | RV8803_FLAG_V2F);
- ret = i2c_smbus_write_byte_data(client, RV8803_FLAG, flags);
+ ret = rv8803_write_reg(client, RV8803_FLAG, flags);
mutex_unlock(&rv8803->flags_lock);
- if (ret < 0)
+ if (ret)
return ret;
return 0;
struct i2c_client *client = to_i2c_client(dev);
int ret;
- ret = i2c_smbus_write_byte_data(client, RV8803_RAM, buf[0]);
- if (ret < 0)
+ ret = rv8803_write_reg(client, RV8803_RAM, buf[0]);
+ if (ret)
return ret;
return 1;
struct i2c_client *client = to_i2c_client(dev);
int ret;
- ret = i2c_smbus_read_byte_data(client, RV8803_RAM);
+ ret = rv8803_read_reg(client, RV8803_RAM);
if (ret < 0)
return ret;
{
struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
struct rv8803_data *rv8803;
- int err, flags, try = 0;
+ int err, flags;
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
I2C_FUNC_SMBUS_I2C_BLOCK)) {
rv8803->client = client;
i2c_set_clientdata(client, rv8803);
- /*
- * There is a 60µs window where the RTC may not reply on the i2c bus in
- * that case, the transfer is not ACKed. In that case, ensure there are
- * multiple attempts.
- */
- do {
- flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
- try++;
- } while ((flags == -ENXIO) && (try < 3));
-
+ flags = rv8803_read_reg(client, RV8803_FLAG);
if (flags < 0)
return flags;
return PTR_ERR(rv8803->rtc);
}
- try = 0;
- do {
- err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT,
- RV8803_EXT_WADA);
- try++;
- } while ((err == -ENXIO) && (try < 3));
+ err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA);
if (err)
return err;
t->time.tm_min = bcd2bin(alarmvals[0] & 0x7f);
t->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f);
- if (alarmvals[2] & RX8010_ALARM_AE)
- t->time.tm_mday = -1;
- else
+ if (!(alarmvals[2] & RX8010_ALARM_AE))
t->time.tm_mday = bcd2bin(alarmvals[2] & 0x7f);
- t->time.tm_wday = -1;
- t->time.tm_mon = -1;
- t->time.tm_year = -1;
-
t->enabled = !!(rx8010->ctrlreg & RX8010_CTRL_AIE);
t->pending = (flagreg & RX8010_FLAG_AF) && t->enabled;
t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12
+ (ald[1] & 0x20 ? 12 : 0);
- t->time.tm_wday = -1;
- t->time.tm_mday = -1;
- t->time.tm_mon = -1;
- t->time.tm_year = -1;
-
dev_dbg(dev, "%s: date: %ds %dm %dh %dmd %dm %dy\n",
__func__,
t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
#include <linux/bitrev.h>
#include <linux/bcd.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#define S35390A_CMD_STATUS1 0
#define S35390A_CMD_STATUS2 1
#define S35390A_ALRM_BYTE_HOURS 1
#define S35390A_ALRM_BYTE_MINS 2
+/* flags for STATUS1 */
#define S35390A_FLAG_POC 0x01
#define S35390A_FLAG_BLD 0x02
+#define S35390A_FLAG_INT2 0x04
#define S35390A_FLAG_24H 0x40
#define S35390A_FLAG_RESET 0x80
+
+/* flag for STATUS2 */
#define S35390A_FLAG_TEST 0x01
#define S35390A_INT2_MODE_MASK 0xF0
return 0;
}
-static int s35390a_reset(struct s35390a *s35390a)
+/*
+ * Returns <0 on error, 0 if rtc is setup fine and 1 if the chip was reset.
+ * To keep the information if an irq is pending, pass the value read from
+ * STATUS1 to the caller.
+ */
+static int s35390a_reset(struct s35390a *s35390a, char *status1)
{
- char buf[1];
-
- if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)) < 0)
- return -EIO;
-
- if (!(buf[0] & (S35390A_FLAG_POC | S35390A_FLAG_BLD)))
+ char buf;
+ int ret;
+ unsigned initcount = 0;
+
+ ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, status1, 1);
+ if (ret < 0)
+ return ret;
+
+ if (*status1 & S35390A_FLAG_POC)
+ /*
+ * Do not communicate for 0.5 seconds since the power-on
+ * detection circuit is in operation.
+ */
+ msleep(500);
+ else if (!(*status1 & S35390A_FLAG_BLD))
+ /*
+ * If both POC and BLD are unset everything is fine.
+ */
return 0;
- buf[0] |= (S35390A_FLAG_RESET | S35390A_FLAG_24H);
- buf[0] &= 0xf0;
- return s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf));
+ /*
+ * At least one of POC and BLD are set, so reinitialise chip. Keeping
+ * this information in the hardware to know later that the time isn't
+ * valid is unfortunately not possible because POC and BLD are cleared
+ * on read. So the reset is best done now.
+ *
+ * The 24H bit is kept over reset, so set it already here.
+ */
+initialize:
+ *status1 = S35390A_FLAG_24H;
+ buf = S35390A_FLAG_RESET | S35390A_FLAG_24H;
+ ret = s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1);
+
+ if (ret < 0)
+ return ret;
+
+ ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1);
+ if (ret < 0)
+ return ret;
+
+ if (buf & (S35390A_FLAG_POC | S35390A_FLAG_BLD)) {
+ /* Try up to five times to reset the chip */
+ if (initcount < 5) {
+ ++initcount;
+ goto initialize;
+ } else
+ return -EIO;
+ }
+
+ return 1;
}
static int s35390a_disable_test_mode(struct s35390a *s35390a)
alm->time.tm_min, alm->time.tm_hour, alm->time.tm_mday,
alm->time.tm_mon, alm->time.tm_year, alm->time.tm_wday);
- /* disable interrupt */
+ /* disable interrupt (which deasserts the irq line) */
err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts));
if (err < 0)
return err;
- /* clear pending interrupt, if any */
+ /* clear pending interrupt (in STATUS1 only), if any */
err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &sts, sizeof(sts));
if (err < 0)
return err;
if (alm->time.tm_wday != -1)
buf[S35390A_ALRM_BYTE_WDAY] = bin2bcd(alm->time.tm_wday) | 0x80;
+ else
+ buf[S35390A_ALRM_BYTE_WDAY] = 0;
buf[S35390A_ALRM_BYTE_HOURS] = s35390a_hr2reg(s35390a,
alm->time.tm_hour) | 0x80;
if (err < 0)
return err;
- if (bitrev8(sts) != S35390A_INT2_MODE_ALARM)
- return -EINVAL;
+ if ((bitrev8(sts) & S35390A_INT2_MODE_MASK) != S35390A_INT2_MODE_ALARM) {
+ /*
+ * When the alarm isn't enabled, the register to configure
+ * the alarm time isn't accessible.
+ */
+ alm->enabled = 0;
+ return 0;
+ } else {
+ alm->enabled = 1;
+ }
err = s35390a_get_reg(s35390a, S35390A_CMD_INT2_REG1, buf, sizeof(buf));
if (err < 0)
return err;
/* This chip returns the bits of each byte in reverse order */
- for (i = 0; i < 3; ++i) {
+ for (i = 0; i < 3; ++i)
buf[i] = bitrev8(buf[i]);
- buf[i] &= ~0x80;
- }
- alm->time.tm_wday = bcd2bin(buf[S35390A_ALRM_BYTE_WDAY]);
- alm->time.tm_hour = s35390a_reg2hr(s35390a,
- buf[S35390A_ALRM_BYTE_HOURS]);
- alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS]);
+ /*
+ * B0 of the three matching registers is an enable flag. Iff it is set
+ * the configured value is used for matching.
+ */
+ if (buf[S35390A_ALRM_BYTE_WDAY] & 0x80)
+ alm->time.tm_wday =
+ bcd2bin(buf[S35390A_ALRM_BYTE_WDAY] & ~0x80);
+
+ if (buf[S35390A_ALRM_BYTE_HOURS] & 0x80)
+ alm->time.tm_hour =
+ s35390a_reg2hr(s35390a,
+ buf[S35390A_ALRM_BYTE_HOURS] & ~0x80);
+
+ if (buf[S35390A_ALRM_BYTE_MINS] & 0x80)
+ alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS] & ~0x80);
+
+ /* alarm triggers always at s=0 */
+ alm->time.tm_sec = 0;
dev_dbg(&client->dev, "%s: alm is mins=%d, hours=%d, wday=%d\n",
__func__, alm->time.tm_min, alm->time.tm_hour,
static int s35390a_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- int err;
+ int err, err_reset;
unsigned int i;
struct s35390a *s35390a;
struct rtc_time tm;
- char buf[1];
+ char buf, status1;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
err = -ENODEV;
}
}
- err = s35390a_reset(s35390a);
- if (err < 0) {
+ err_reset = s35390a_reset(s35390a, &status1);
+ if (err_reset < 0) {
+ err = err_reset;
dev_err(&client->dev, "error resetting chip\n");
goto exit_dummy;
}
- err = s35390a_disable_test_mode(s35390a);
- if (err < 0) {
- dev_err(&client->dev, "error disabling test mode\n");
- goto exit_dummy;
- }
-
- err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf));
- if (err < 0) {
- dev_err(&client->dev, "error checking 12/24 hour mode\n");
- goto exit_dummy;
- }
- if (buf[0] & S35390A_FLAG_24H)
+ if (status1 & S35390A_FLAG_24H)
s35390a->twentyfourhour = 1;
else
s35390a->twentyfourhour = 0;
- if (s35390a_get_datetime(client, &tm) < 0)
+ if (status1 & S35390A_FLAG_INT2) {
+ /* disable alarm (and maybe test mode) */
+ buf = 0;
+ err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &buf, 1);
+ if (err < 0) {
+ dev_err(&client->dev, "error disabling alarm");
+ goto exit_dummy;
+ }
+ } else {
+ err = s35390a_disable_test_mode(s35390a);
+ if (err < 0) {
+ dev_err(&client->dev, "error disabling test mode\n");
+ goto exit_dummy;
+ }
+ }
+
+ if (err_reset > 0 || s35390a_get_datetime(client, &tm) < 0)
dev_warn(&client->dev, "clock needs to be set\n");
device_set_wakeup_capable(&client->dev, 1);
err = PTR_ERR(s35390a->rtc);
goto exit_dummy;
}
+
+ if (status1 & S35390A_FLAG_INT2)
+ rtc_update_irq(s35390a->rtc, 1, RTC_AF);
+
return 0;
exit_dummy:
if (!is_power_of_2(freq))
return -EINVAL;
+ s3c_rtc_enable_clk(info);
spin_lock_irq(&info->pie_lock);
if (info->data->set_freq)
info->data->set_freq(info, freq);
spin_unlock_irq(&info->pie_lock);
+ s3c_rtc_disable_clk(info);
return 0;
}
/* decode the alarm enable field */
if (alm_en & S3C2410_RTCALM_SECEN)
alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
- else
- alm_tm->tm_sec = -1;
if (alm_en & S3C2410_RTCALM_MINEN)
alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
- else
- alm_tm->tm_min = -1;
if (alm_en & S3C2410_RTCALM_HOUREN)
alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
- else
- alm_tm->tm_hour = -1;
if (alm_en & S3C2410_RTCALM_DAYEN)
alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday);
- else
- alm_tm->tm_mday = -1;
if (alm_en & S3C2410_RTCALM_MONEN) {
alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon);
alm_tm->tm_mon -= 1;
- } else {
- alm_tm->tm_mon = -1;
}
if (alm_en & S3C2410_RTCALM_YEAREN)
alm_tm->tm_year = bcd2bin(alm_tm->tm_year);
- else
- alm_tm->tm_year = -1;
return 0;
}
s3c_rtc_setfreq(info, 1);
- s3c_rtc_disable_clk(info);
-
return 0;
err_nortc:
tm->tm_mon = sh_rtc_read_alarm_value(rtc, RMONAR);
if (tm->tm_mon > 0)
tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */
- tm->tm_year = 0xffff;
wkalrm->enabled = (readb(rtc->regbase + RCR1) & RCR1_AIE) ? 1 : 0;
writeb(bin2bcd(value) | AR_ENB, rtc->regbase + reg_off);
}
-static int sh_rtc_check_alarm(struct rtc_time *tm)
-{
- /*
- * The original rtc says anything > 0xc0 is "don't care" or "match
- * all" - most users use 0xff but rtc-dev uses -1 for the same thing.
- * The original rtc doesn't support years - some things use -1 and
- * some 0xffff. We use -1 to make out tests easier.
- */
- if (tm->tm_year == 0xffff)
- tm->tm_year = -1;
- if (tm->tm_mon >= 0xff)
- tm->tm_mon = -1;
- if (tm->tm_mday >= 0xff)
- tm->tm_mday = -1;
- if (tm->tm_wday >= 0xff)
- tm->tm_wday = -1;
- if (tm->tm_hour >= 0xff)
- tm->tm_hour = -1;
- if (tm->tm_min >= 0xff)
- tm->tm_min = -1;
- if (tm->tm_sec >= 0xff)
- tm->tm_sec = -1;
-
- if (tm->tm_year > 9999 ||
- tm->tm_mon >= 12 ||
- tm->tm_mday == 0 || tm->tm_mday >= 32 ||
- tm->tm_wday >= 7 ||
- tm->tm_hour >= 24 ||
- tm->tm_min >= 60 ||
- tm->tm_sec >= 60)
- return -EINVAL;
-
- return 0;
-}
-
static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
{
struct platform_device *pdev = to_platform_device(dev);
struct sh_rtc *rtc = platform_get_drvdata(pdev);
unsigned int rcr1;
struct rtc_time *tm = &wkalrm->time;
- int mon, err;
-
- err = sh_rtc_check_alarm(tm);
- if (unlikely(err < 0))
- return err;
+ int mon;
spin_lock_irq(&rtc->lock);
if (sec == 0) {
/* alarm is disabled. */
alarm->enabled = 0;
- alarm->time.tm_mon = -1;
- alarm->time.tm_mday = -1;
- alarm->time.tm_year = -1;
- alarm->time.tm_hour = -1;
- alarm->time.tm_min = -1;
- alarm->time.tm_sec = -1;
} else {
/* alarm is enabled. */
alarm->enabled = 1;
#include <linux/rtc.h>
#include <linux/types.h>
#include <linux/bcd.h>
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
#include <linux/delay.h>
#include <linux/gpio.h>
#include <linux/slab.h>
To compile this driver as a module, choose M here: the
module will be called ibmvscsi.
+config SCSI_IBMVSCSIS
+ tristate "IBM Virtual SCSI Server support"
+ depends on PPC_PSERIES && TARGET_CORE && SCSI && PCI
+ help
+ This is the IBM POWER Virtual SCSI Target Server
+ This driver uses the SRP protocol for communication betwen servers
+ guest and/or the host that run on the same server.
+ More information on VSCSI protocol can be found at www.power.org
+
+ The userspace configuration needed to initialize the driver can be
+ be found here:
+
+ https://github.com/powervm/ibmvscsis/wiki/Configuration
+
+ To compile this driver as a module, choose M here: the
+ module will be called ibmvscsis.
+
config SCSI_IBMVFC
tristate "IBM Virtual FC support"
depends on PPC_PSERIES && SCSI
obj-$(CONFIG_SCSI_NSP32) += nsp32.o
obj-$(CONFIG_SCSI_IPR) += ipr.o
obj-$(CONFIG_SCSI_IBMVSCSI) += ibmvscsi/
+obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsi_tgt/
obj-$(CONFIG_SCSI_IBMVFC) += ibmvscsi/
obj-$(CONFIG_SCSI_HPTIOP) += hptiop.o
obj-$(CONFIG_SCSI_STEX) += stex.o
#include <linux/list.h>
#include <linux/types.h>
-#include "viosrp.h"
+#include <scsi/viosrp.h>
#define IBMVFC_NAME "ibmvfc"
#define IBMVFC_DRIVER_VERSION "1.0.11"
#include <linux/list.h>
#include <linux/completion.h>
#include <linux/interrupt.h>
-#include "viosrp.h"
+#include <scsi/viosrp.h>
struct scsi_cmnd;
struct Scsi_Host;
+++ /dev/null
-/*****************************************************************************/
-/* srp.h -- SCSI RDMA Protocol definitions */
-/* */
-/* Written By: Colin Devilbis, IBM Corporation */
-/* */
-/* Copyright (C) 2003 IBM Corporation */
-/* */
-/* This program is free software; you can redistribute it and/or modify */
-/* it under the terms of the GNU General Public License as published by */
-/* the Free Software Foundation; either version 2 of the License, or */
-/* (at your option) any later version. */
-/* */
-/* This program is distributed in the hope that it will be useful, */
-/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
-/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
-/* GNU General Public License for more details. */
-/* */
-/* You should have received a copy of the GNU General Public License */
-/* along with this program; if not, write to the Free Software */
-/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-/* */
-/* */
-/* This file contains structures and definitions for IBM RPA (RS/6000 */
-/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */
-/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */
-/* commands between logical partitions. */
-/* */
-/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */
-/* between partitions. The definitions in this file are architected, */
-/* and cannot be changed without breaking compatibility with other versions */
-/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/
-/* between logical partitions */
-/*****************************************************************************/
-#ifndef VIOSRP_H
-#define VIOSRP_H
-#include <scsi/srp.h>
-
-#define SRP_VERSION "16.a"
-#define SRP_MAX_IU_LEN 256
-#define SRP_MAX_LOC_LEN 32
-
-union srp_iu {
- struct srp_login_req login_req;
- struct srp_login_rsp login_rsp;
- struct srp_login_rej login_rej;
- struct srp_i_logout i_logout;
- struct srp_t_logout t_logout;
- struct srp_tsk_mgmt tsk_mgmt;
- struct srp_cmd cmd;
- struct srp_rsp rsp;
- u8 reserved[SRP_MAX_IU_LEN];
-};
-
-enum viosrp_crq_headers {
- VIOSRP_CRQ_FREE = 0x00,
- VIOSRP_CRQ_CMD_RSP = 0x80,
- VIOSRP_CRQ_INIT_RSP = 0xC0,
- VIOSRP_CRQ_XPORT_EVENT = 0xFF
-};
-
-enum viosrp_crq_init_formats {
- VIOSRP_CRQ_INIT = 0x01,
- VIOSRP_CRQ_INIT_COMPLETE = 0x02
-};
-
-enum viosrp_crq_formats {
- VIOSRP_SRP_FORMAT = 0x01,
- VIOSRP_MAD_FORMAT = 0x02,
- VIOSRP_OS400_FORMAT = 0x03,
- VIOSRP_AIX_FORMAT = 0x04,
- VIOSRP_LINUX_FORMAT = 0x05,
- VIOSRP_INLINE_FORMAT = 0x06
-};
-
-enum viosrp_crq_status {
- VIOSRP_OK = 0x0,
- VIOSRP_NONRECOVERABLE_ERR = 0x1,
- VIOSRP_VIOLATES_MAX_XFER = 0x2,
- VIOSRP_PARTNER_PANIC = 0x3,
- VIOSRP_DEVICE_BUSY = 0x8,
- VIOSRP_ADAPTER_FAIL = 0x10,
- VIOSRP_OK2 = 0x99,
-};
-
-struct viosrp_crq {
- u8 valid; /* used by RPA */
- u8 format; /* SCSI vs out-of-band */
- u8 reserved;
- u8 status; /* non-scsi failure? (e.g. DMA failure) */
- __be16 timeout; /* in seconds */
- __be16 IU_length; /* in bytes */
- __be64 IU_data_ptr; /* the TCE for transferring data */
-};
-
-/* MADs are Management requests above and beyond the IUs defined in the SRP
- * standard.
- */
-enum viosrp_mad_types {
- VIOSRP_EMPTY_IU_TYPE = 0x01,
- VIOSRP_ERROR_LOG_TYPE = 0x02,
- VIOSRP_ADAPTER_INFO_TYPE = 0x03,
- VIOSRP_CAPABILITIES_TYPE = 0x05,
- VIOSRP_ENABLE_FAST_FAIL = 0x08,
-};
-
-enum viosrp_mad_status {
- VIOSRP_MAD_SUCCESS = 0x00,
- VIOSRP_MAD_NOT_SUPPORTED = 0xF1,
- VIOSRP_MAD_FAILED = 0xF7,
-};
-
-enum viosrp_capability_type {
- MIGRATION_CAPABILITIES = 0x01,
- RESERVATION_CAPABILITIES = 0x02,
-};
-
-enum viosrp_capability_support {
- SERVER_DOES_NOT_SUPPORTS_CAP = 0x0,
- SERVER_SUPPORTS_CAP = 0x01,
- SERVER_CAP_DATA = 0x02,
-};
-
-enum viosrp_reserve_type {
- CLIENT_RESERVE_SCSI_2 = 0x01,
-};
-
-enum viosrp_capability_flag {
- CLIENT_MIGRATED = 0x01,
- CLIENT_RECONNECT = 0x02,
- CAP_LIST_SUPPORTED = 0x04,
- CAP_LIST_DATA = 0x08,
-};
-
-/*
- * Common MAD header
- */
-struct mad_common {
- __be32 type;
- __be16 status;
- __be16 length;
- __be64 tag;
-};
-
-/*
- * All SRP (and MAD) requests normally flow from the
- * client to the server. There is no way for the server to send
- * an asynchronous message back to the client. The Empty IU is used
- * to hang out a meaningless request to the server so that it can respond
- * asynchrouously with something like a SCSI AER
- */
-struct viosrp_empty_iu {
- struct mad_common common;
- __be64 buffer;
- __be32 port;
-};
-
-struct viosrp_error_log {
- struct mad_common common;
- __be64 buffer;
-};
-
-struct viosrp_adapter_info {
- struct mad_common common;
- __be64 buffer;
-};
-
-struct viosrp_fast_fail {
- struct mad_common common;
-};
-
-struct viosrp_capabilities {
- struct mad_common common;
- __be64 buffer;
-};
-
-struct mad_capability_common {
- __be32 cap_type;
- __be16 length;
- __be16 server_support;
-};
-
-struct mad_reserve_cap {
- struct mad_capability_common common;
- __be32 type;
-};
-
-struct mad_migration_cap {
- struct mad_capability_common common;
- __be32 ecl;
-};
-
-struct capabilities{
- __be32 flags;
- char name[SRP_MAX_LOC_LEN];
- char loc[SRP_MAX_LOC_LEN];
- struct mad_migration_cap migration;
- struct mad_reserve_cap reserve;
-};
-
-union mad_iu {
- struct viosrp_empty_iu empty_iu;
- struct viosrp_error_log error_log;
- struct viosrp_adapter_info adapter_info;
- struct viosrp_fast_fail fast_fail;
- struct viosrp_capabilities capabilities;
-};
-
-union viosrp_iu {
- union srp_iu srp;
- union mad_iu mad;
-};
-
-struct mad_adapter_info_data {
- char srp_version[8];
- char partition_name[96];
- __be32 partition_number;
-#define SRP_MAD_VERSION_1 1
- __be32 mad_version;
-#define SRP_MAD_OS_LINUX 2
-#define SRP_MAD_OS_AIX 3
- __be32 os_type;
- __be32 port_max_txu[8]; /* per-port maximum transfer */
-};
-
-#endif
--- /dev/null
+obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsis.o
+
+ibmvscsis-y := libsrp.o ibmvscsi_tgt.o
--- /dev/null
+/*******************************************************************************
+ * IBM Virtual SCSI Target Driver
+ * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp.
+ * Santiago Leon (santil@us.ibm.com) IBM Corp.
+ * Linda Xie (lxie@us.ibm.com) IBM Corp.
+ *
+ * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org>
+ * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org>
+ *
+ * Authors: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ * Authors: Michael Cyr <mikecyr@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ ****************************************************************************/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/string.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_fabric.h>
+
+#include <asm/hvcall.h>
+#include <asm/vio.h>
+
+#include <scsi/viosrp.h>
+
+#include "ibmvscsi_tgt.h"
+
+#define IBMVSCSIS_VERSION "v0.2"
+
+#define INITIAL_SRP_LIMIT 800
+#define DEFAULT_MAX_SECTORS 256
+
+static uint max_vdma_size = MAX_H_COPY_RDMA;
+
+static char system_id[SYS_ID_NAME_LEN] = "";
+static char partition_name[PARTITION_NAMELEN] = "UNKNOWN";
+static uint partition_number = -1;
+
+/* Adapter list and lock to control it */
+static DEFINE_SPINLOCK(ibmvscsis_dev_lock);
+static LIST_HEAD(ibmvscsis_dev_list);
+
+static long ibmvscsis_parse_command(struct scsi_info *vscsi,
+ struct viosrp_crq *crq);
+
+static void ibmvscsis_adapter_idle(struct scsi_info *vscsi);
+
+static void ibmvscsis_determine_resid(struct se_cmd *se_cmd,
+ struct srp_rsp *rsp)
+{
+ u32 residual_count = se_cmd->residual_count;
+
+ if (!residual_count)
+ return;
+
+ if (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+ if (se_cmd->data_direction == DMA_TO_DEVICE) {
+ /* residual data from an underflow write */
+ rsp->flags = SRP_RSP_FLAG_DOUNDER;
+ rsp->data_out_res_cnt = cpu_to_be32(residual_count);
+ } else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
+ /* residual data from an underflow read */
+ rsp->flags = SRP_RSP_FLAG_DIUNDER;
+ rsp->data_in_res_cnt = cpu_to_be32(residual_count);
+ }
+ } else if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) {
+ if (se_cmd->data_direction == DMA_TO_DEVICE) {
+ /* residual data from an overflow write */
+ rsp->flags = SRP_RSP_FLAG_DOOVER;
+ rsp->data_out_res_cnt = cpu_to_be32(residual_count);
+ } else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
+ /* residual data from an overflow read */
+ rsp->flags = SRP_RSP_FLAG_DIOVER;
+ rsp->data_in_res_cnt = cpu_to_be32(residual_count);
+ }
+ }
+}
+
+/**
+ * connection_broken() - Determine if the connection to the client is good
+ * @vscsi: Pointer to our adapter structure
+ *
+ * This function attempts to send a ping MAD to the client. If the call to
+ * queue the request returns H_CLOSED then the connection has been broken
+ * and the function returns TRUE.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt or Process environment
+ */
+static bool connection_broken(struct scsi_info *vscsi)
+{
+ struct viosrp_crq *crq;
+ u64 buffer[2] = { 0, 0 };
+ long h_return_code;
+ bool rc = false;
+
+ /* create a PING crq */
+ crq = (struct viosrp_crq *)&buffer;
+ crq->valid = VALID_CMD_RESP_EL;
+ crq->format = MESSAGE_IN_CRQ;
+ crq->status = PING;
+
+ h_return_code = h_send_crq(vscsi->dds.unit_id,
+ cpu_to_be64(buffer[MSG_HI]),
+ cpu_to_be64(buffer[MSG_LOW]));
+
+ pr_debug("connection_broken: rc %ld\n", h_return_code);
+
+ if (h_return_code == H_CLOSED)
+ rc = true;
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_unregister_command_q() - Helper Function-Unregister Command Queue
+ * @vscsi: Pointer to our adapter structure
+ *
+ * This function calls h_free_q then frees the interrupt bit etc.
+ * It must release the lock before doing so because of the time it can take
+ * for h_free_crq in PHYP
+ * NOTE: the caller must make sure that state and or flags will prevent
+ * interrupt handler from scheduling work.
+ * NOTE: anyone calling this function may need to set the CRQ_CLOSED flag
+ * we can't do it here, because we don't have the lock
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level
+ */
+static long ibmvscsis_unregister_command_q(struct scsi_info *vscsi)
+{
+ long qrc;
+ long rc = ADAPT_SUCCESS;
+ int ticks = 0;
+
+ do {
+ qrc = h_free_crq(vscsi->dds.unit_id);
+ switch (qrc) {
+ case H_SUCCESS:
+ break;
+
+ case H_HARDWARE:
+ case H_PARAMETER:
+ dev_err(&vscsi->dev, "unregister_command_q: error from h_free_crq %ld\n",
+ qrc);
+ rc = ERROR;
+ break;
+
+ case H_BUSY:
+ case H_LONG_BUSY_ORDER_1_MSEC:
+ /* msleep not good for small values */
+ usleep_range(1000, 2000);
+ ticks += 1;
+ break;
+ case H_LONG_BUSY_ORDER_10_MSEC:
+ usleep_range(10000, 20000);
+ ticks += 10;
+ break;
+ case H_LONG_BUSY_ORDER_100_MSEC:
+ msleep(100);
+ ticks += 100;
+ break;
+ case H_LONG_BUSY_ORDER_1_SEC:
+ ssleep(1);
+ ticks += 1000;
+ break;
+ case H_LONG_BUSY_ORDER_10_SEC:
+ ssleep(10);
+ ticks += 10000;
+ break;
+ case H_LONG_BUSY_ORDER_100_SEC:
+ ssleep(100);
+ ticks += 100000;
+ break;
+ default:
+ dev_err(&vscsi->dev, "unregister_command_q: unknown error %ld from h_free_crq\n",
+ qrc);
+ rc = ERROR;
+ break;
+ }
+
+ /*
+ * dont wait more then 300 seconds
+ * ticks are in milliseconds more or less
+ */
+ if (ticks > 300000 && qrc != H_SUCCESS) {
+ rc = ERROR;
+ dev_err(&vscsi->dev, "Excessive wait for h_free_crq\n");
+ }
+ } while (qrc != H_SUCCESS && rc == ADAPT_SUCCESS);
+
+ pr_debug("Freeing CRQ: phyp rc %ld, rc %ld\n", qrc, rc);
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_delete_client_info() - Helper function to Delete Client Info
+ * @vscsi: Pointer to our adapter structure
+ * @client_closed: True if client closed its queue
+ *
+ * Deletes information specific to the client when the client goes away
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt or Process
+ */
+static void ibmvscsis_delete_client_info(struct scsi_info *vscsi,
+ bool client_closed)
+{
+ vscsi->client_cap = 0;
+
+ /*
+ * Some things we don't want to clear if we're closing the queue,
+ * because some clients don't resend the host handshake when they
+ * get a transport event.
+ */
+ if (client_closed)
+ vscsi->client_data.os_type = 0;
+}
+
+/**
+ * ibmvscsis_free_command_q() - Free Command Queue
+ * @vscsi: Pointer to our adapter structure
+ *
+ * This function calls unregister_command_q, then clears interrupts and
+ * any pending interrupt acknowledgments associated with the command q.
+ * It also clears memory if there is no error.
+ *
+ * PHYP did not meet the PAPR architecture so that we must give up the
+ * lock. This causes a timing hole regarding state change. To close the
+ * hole this routine does accounting on any change that occurred during
+ * the time the lock is not held.
+ * NOTE: must give up and then acquire the interrupt lock, the caller must
+ * make sure that state and or flags will prevent interrupt handler from
+ * scheduling work.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level, interrupt lock is held
+ */
+static long ibmvscsis_free_command_q(struct scsi_info *vscsi)
+{
+ int bytes;
+ u32 flags_under_lock;
+ u16 state_under_lock;
+ long rc = ADAPT_SUCCESS;
+
+ if (!(vscsi->flags & CRQ_CLOSED)) {
+ vio_disable_interrupts(vscsi->dma_dev);
+
+ state_under_lock = vscsi->new_state;
+ flags_under_lock = vscsi->flags;
+ vscsi->phyp_acr_state = 0;
+ vscsi->phyp_acr_flags = 0;
+
+ spin_unlock_bh(&vscsi->intr_lock);
+ rc = ibmvscsis_unregister_command_q(vscsi);
+ spin_lock_bh(&vscsi->intr_lock);
+
+ if (state_under_lock != vscsi->new_state)
+ vscsi->phyp_acr_state = vscsi->new_state;
+
+ vscsi->phyp_acr_flags = ((~flags_under_lock) & vscsi->flags);
+
+ if (rc == ADAPT_SUCCESS) {
+ bytes = vscsi->cmd_q.size * PAGE_SIZE;
+ memset(vscsi->cmd_q.base_addr, 0, bytes);
+ vscsi->cmd_q.index = 0;
+ vscsi->flags |= CRQ_CLOSED;
+
+ ibmvscsis_delete_client_info(vscsi, false);
+ }
+
+ pr_debug("free_command_q: flags 0x%x, state 0x%hx, acr_flags 0x%x, acr_state 0x%hx\n",
+ vscsi->flags, vscsi->state, vscsi->phyp_acr_flags,
+ vscsi->phyp_acr_state);
+ }
+ return rc;
+}
+
+/**
+ * ibmvscsis_cmd_q_dequeue() - Get valid Command element
+ * @mask: Mask to use in case index wraps
+ * @current_index: Current index into command queue
+ * @base_addr: Pointer to start of command queue
+ *
+ * Returns a pointer to a valid command element or NULL, if the command
+ * queue is empty
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt environment, interrupt lock held
+ */
+static struct viosrp_crq *ibmvscsis_cmd_q_dequeue(uint mask,
+ uint *current_index,
+ struct viosrp_crq *base_addr)
+{
+ struct viosrp_crq *ptr;
+
+ ptr = base_addr + *current_index;
+
+ if (ptr->valid) {
+ *current_index = (*current_index + 1) & mask;
+ dma_rmb();
+ } else {
+ ptr = NULL;
+ }
+
+ return ptr;
+}
+
+/**
+ * ibmvscsis_send_init_message() - send initialize message to the client
+ * @vscsi: Pointer to our adapter structure
+ * @format: Which Init Message format to send
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt environment interrupt lock held
+ */
+static long ibmvscsis_send_init_message(struct scsi_info *vscsi, u8 format)
+{
+ struct viosrp_crq *crq;
+ u64 buffer[2] = { 0, 0 };
+ long rc;
+
+ crq = (struct viosrp_crq *)&buffer;
+ crq->valid = VALID_INIT_MSG;
+ crq->format = format;
+ rc = h_send_crq(vscsi->dds.unit_id, cpu_to_be64(buffer[MSG_HI]),
+ cpu_to_be64(buffer[MSG_LOW]));
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_check_init_msg() - Check init message valid
+ * @vscsi: Pointer to our adapter structure
+ * @format: Pointer to return format of Init Message, if any.
+ * Set to UNUSED_FORMAT if no Init Message in queue.
+ *
+ * Checks if an initialize message was queued by the initiatior
+ * after the queue was created and before the interrupt was enabled.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level only, interrupt lock held
+ */
+static long ibmvscsis_check_init_msg(struct scsi_info *vscsi, uint *format)
+{
+ struct viosrp_crq *crq;
+ long rc = ADAPT_SUCCESS;
+
+ crq = ibmvscsis_cmd_q_dequeue(vscsi->cmd_q.mask, &vscsi->cmd_q.index,
+ vscsi->cmd_q.base_addr);
+ if (!crq) {
+ *format = (uint)UNUSED_FORMAT;
+ } else if (crq->valid == VALID_INIT_MSG && crq->format == INIT_MSG) {
+ *format = (uint)INIT_MSG;
+ crq->valid = INVALIDATE_CMD_RESP_EL;
+ dma_rmb();
+
+ /*
+ * the caller has ensured no initialize message was
+ * sent after the queue was
+ * created so there should be no other message on the queue.
+ */
+ crq = ibmvscsis_cmd_q_dequeue(vscsi->cmd_q.mask,
+ &vscsi->cmd_q.index,
+ vscsi->cmd_q.base_addr);
+ if (crq) {
+ *format = (uint)(crq->format);
+ rc = ERROR;
+ crq->valid = INVALIDATE_CMD_RESP_EL;
+ dma_rmb();
+ }
+ } else {
+ *format = (uint)(crq->format);
+ rc = ERROR;
+ crq->valid = INVALIDATE_CMD_RESP_EL;
+ dma_rmb();
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_establish_new_q() - Establish new CRQ queue
+ * @vscsi: Pointer to our adapter structure
+ * @new_state: New state being established after resetting the queue
+ *
+ * Must be called with interrupt lock held.
+ */
+static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state)
+{
+ long rc = ADAPT_SUCCESS;
+ uint format;
+
+ vscsi->flags &= PRESERVE_FLAG_FIELDS;
+ vscsi->rsp_q_timer.timer_pops = 0;
+ vscsi->debit = 0;
+ vscsi->credit = 0;
+
+ rc = vio_enable_interrupts(vscsi->dma_dev);
+ if (rc) {
+ pr_warn("reset_queue: failed to enable interrupts, rc %ld\n",
+ rc);
+ return rc;
+ }
+
+ rc = ibmvscsis_check_init_msg(vscsi, &format);
+ if (rc) {
+ dev_err(&vscsi->dev, "reset_queue: check_init_msg failed, rc %ld\n",
+ rc);
+ return rc;
+ }
+
+ if (format == UNUSED_FORMAT && new_state == WAIT_CONNECTION) {
+ rc = ibmvscsis_send_init_message(vscsi, INIT_MSG);
+ switch (rc) {
+ case H_SUCCESS:
+ case H_DROPPED:
+ case H_CLOSED:
+ rc = ADAPT_SUCCESS;
+ break;
+
+ case H_PARAMETER:
+ case H_HARDWARE:
+ break;
+
+ default:
+ vscsi->state = UNDEFINED;
+ rc = H_HARDWARE;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_reset_queue() - Reset CRQ Queue
+ * @vscsi: Pointer to our adapter structure
+ * @new_state: New state to establish after resetting the queue
+ *
+ * This function calls h_free_q and then calls h_reg_q and does all
+ * of the bookkeeping to get us back to where we can communicate.
+ *
+ * Actually, we don't always call h_free_crq. A problem was discovered
+ * where one partition would close and reopen his queue, which would
+ * cause his partner to get a transport event, which would cause him to
+ * close and reopen his queue, which would cause the original partition
+ * to get a transport event, etc., etc. To prevent this, we don't
+ * actually close our queue if the client initiated the reset, (i.e.
+ * either we got a transport event or we have detected that the client's
+ * queue is gone)
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process environment, called with interrupt lock held
+ */
+static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state)
+{
+ int bytes;
+ long rc = ADAPT_SUCCESS;
+
+ pr_debug("reset_queue: flags 0x%x\n", vscsi->flags);
+
+ /* don't reset, the client did it for us */
+ if (vscsi->flags & (CLIENT_FAILED | TRANS_EVENT)) {
+ vscsi->flags &= PRESERVE_FLAG_FIELDS;
+ vscsi->rsp_q_timer.timer_pops = 0;
+ vscsi->debit = 0;
+ vscsi->credit = 0;
+ vscsi->state = new_state;
+ vio_enable_interrupts(vscsi->dma_dev);
+ } else {
+ rc = ibmvscsis_free_command_q(vscsi);
+ if (rc == ADAPT_SUCCESS) {
+ vscsi->state = new_state;
+
+ bytes = vscsi->cmd_q.size * PAGE_SIZE;
+ rc = h_reg_crq(vscsi->dds.unit_id,
+ vscsi->cmd_q.crq_token, bytes);
+ if (rc == H_CLOSED || rc == H_SUCCESS) {
+ rc = ibmvscsis_establish_new_q(vscsi,
+ new_state);
+ }
+
+ if (rc != ADAPT_SUCCESS) {
+ pr_debug("reset_queue: reg_crq rc %ld\n", rc);
+
+ vscsi->state = ERR_DISCONNECTED;
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ ibmvscsis_free_command_q(vscsi);
+ }
+ } else {
+ vscsi->state = ERR_DISCONNECTED;
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ }
+ }
+}
+
+/**
+ * ibmvscsis_free_cmd_resources() - Free command resources
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Command which is not longer in use
+ *
+ * Must be called with interrupt lock held.
+ */
+static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd)
+{
+ struct iu_entry *iue = cmd->iue;
+
+ switch (cmd->type) {
+ case TASK_MANAGEMENT:
+ case SCSI_CDB:
+ /*
+ * When the queue goes down this value is cleared, so it
+ * cannot be cleared in this general purpose function.
+ */
+ if (vscsi->debit)
+ vscsi->debit -= 1;
+ break;
+ case ADAPTER_MAD:
+ vscsi->flags &= ~PROCESSING_MAD;
+ break;
+ case UNSET_TYPE:
+ break;
+ default:
+ dev_err(&vscsi->dev, "free_cmd_resources unknown type %d\n",
+ cmd->type);
+ break;
+ }
+
+ cmd->iue = NULL;
+ list_add_tail(&cmd->list, &vscsi->free_cmd);
+ srp_iu_put(iue);
+
+ if (list_empty(&vscsi->active_q) && list_empty(&vscsi->schedule_q) &&
+ list_empty(&vscsi->waiting_rsp) && (vscsi->flags & WAIT_FOR_IDLE)) {
+ vscsi->flags &= ~WAIT_FOR_IDLE;
+ complete(&vscsi->wait_idle);
+ }
+}
+
+/**
+ * ibmvscsis_disconnect() - Helper function to disconnect
+ * @work: Pointer to work_struct, gives access to our adapter structure
+ *
+ * An error has occurred or the driver received a Transport event,
+ * and the driver is requesting that the command queue be de-registered
+ * in a safe manner. If there is no outstanding I/O then we can stop the
+ * queue. If we are restarting the queue it will be reflected in the
+ * the state of the adapter.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process environment
+ */
+static void ibmvscsis_disconnect(struct work_struct *work)
+{
+ struct scsi_info *vscsi = container_of(work, struct scsi_info,
+ proc_work);
+ u16 new_state;
+ bool wait_idle = false;
+ long rc = ADAPT_SUCCESS;
+
+ spin_lock_bh(&vscsi->intr_lock);
+ new_state = vscsi->new_state;
+ vscsi->new_state = 0;
+
+ pr_debug("disconnect: flags 0x%x, state 0x%hx\n", vscsi->flags,
+ vscsi->state);
+
+ /*
+ * check which state we are in and see if we
+ * should transitition to the new state
+ */
+ switch (vscsi->state) {
+ /* Should never be called while in this state. */
+ case NO_QUEUE:
+ /*
+ * Can never transition from this state;
+ * igonore errors and logout.
+ */
+ case UNCONFIGURING:
+ break;
+
+ /* can transition from this state to UNCONFIGURING */
+ case ERR_DISCONNECT:
+ if (new_state == UNCONFIGURING)
+ vscsi->state = new_state;
+ break;
+
+ /*
+ * Can transition from this state to to unconfiguring
+ * or err disconnect.
+ */
+ case ERR_DISCONNECT_RECONNECT:
+ switch (new_state) {
+ case UNCONFIGURING:
+ case ERR_DISCONNECT:
+ vscsi->state = new_state;
+ break;
+
+ case WAIT_IDLE:
+ break;
+ default:
+ break;
+ }
+ break;
+
+ /* can transition from this state to UNCONFIGURING */
+ case ERR_DISCONNECTED:
+ if (new_state == UNCONFIGURING)
+ vscsi->state = new_state;
+ break;
+
+ /*
+ * If this is a transition into an error state.
+ * a client is attempting to establish a connection
+ * and has violated the RPA protocol.
+ * There can be nothing pending on the adapter although
+ * there can be requests in the command queue.
+ */
+ case WAIT_ENABLED:
+ case PART_UP_WAIT_ENAB:
+ switch (new_state) {
+ case ERR_DISCONNECT:
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ vscsi->state = new_state;
+ vscsi->flags &= ~(SCHEDULE_DISCONNECT |
+ DISCONNECT_SCHEDULED);
+ ibmvscsis_free_command_q(vscsi);
+ break;
+ case ERR_DISCONNECT_RECONNECT:
+ ibmvscsis_reset_queue(vscsi, WAIT_ENABLED);
+ break;
+
+ /* should never happen */
+ case WAIT_IDLE:
+ rc = ERROR;
+ dev_err(&vscsi->dev, "disconnect: invalid state %d for WAIT_IDLE\n",
+ vscsi->state);
+ break;
+ }
+ break;
+
+ case WAIT_IDLE:
+ switch (new_state) {
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ vscsi->state = new_state;
+ break;
+ }
+ break;
+
+ /*
+ * Initiator has not done a successful srp login
+ * or has done a successful srp logout ( adapter was not
+ * busy). In the first case there can be responses queued
+ * waiting for space on the initiators response queue (MAD)
+ * The second case the adapter is idle. Assume the worse case,
+ * i.e. the second case.
+ */
+ case WAIT_CONNECTION:
+ case CONNECTED:
+ case SRP_PROCESSING:
+ wait_idle = true;
+ vscsi->state = new_state;
+ break;
+
+ /* can transition from this state to UNCONFIGURING */
+ case UNDEFINED:
+ if (new_state == UNCONFIGURING)
+ vscsi->state = new_state;
+ break;
+ default:
+ break;
+ }
+
+ if (wait_idle) {
+ pr_debug("disconnect start wait, active %d, sched %d\n",
+ (int)list_empty(&vscsi->active_q),
+ (int)list_empty(&vscsi->schedule_q));
+ if (!list_empty(&vscsi->active_q) ||
+ !list_empty(&vscsi->schedule_q)) {
+ vscsi->flags |= WAIT_FOR_IDLE;
+ pr_debug("disconnect flags 0x%x\n", vscsi->flags);
+ /*
+ * This routine is can not be called with the interrupt
+ * lock held.
+ */
+ spin_unlock_bh(&vscsi->intr_lock);
+ wait_for_completion(&vscsi->wait_idle);
+ spin_lock_bh(&vscsi->intr_lock);
+ }
+ pr_debug("disconnect stop wait\n");
+
+ ibmvscsis_adapter_idle(vscsi);
+ }
+
+ spin_unlock_bh(&vscsi->intr_lock);
+}
+
+/**
+ * ibmvscsis_post_disconnect() - Schedule the disconnect
+ * @vscsi: Pointer to our adapter structure
+ * @new_state: State to move to after disconnecting
+ * @flag_bits: Flags to turn on in adapter structure
+ *
+ * If it's already been scheduled, then see if we need to "upgrade"
+ * the new state (if the one passed in is more "severe" than the
+ * previous one).
+ *
+ * PRECONDITION:
+ * interrupt lock is held
+ */
+static void ibmvscsis_post_disconnect(struct scsi_info *vscsi, uint new_state,
+ uint flag_bits)
+{
+ uint state;
+
+ /* check the validity of the new state */
+ switch (new_state) {
+ case UNCONFIGURING:
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ case WAIT_IDLE:
+ break;
+
+ default:
+ dev_err(&vscsi->dev, "post_disconnect: Invalid new state %d\n",
+ new_state);
+ return;
+ }
+
+ vscsi->flags |= flag_bits;
+
+ pr_debug("post_disconnect: new_state 0x%x, flag_bits 0x%x, vscsi->flags 0x%x, state %hx\n",
+ new_state, flag_bits, vscsi->flags, vscsi->state);
+
+ if (!(vscsi->flags & (DISCONNECT_SCHEDULED | SCHEDULE_DISCONNECT))) {
+ vscsi->flags |= SCHEDULE_DISCONNECT;
+ vscsi->new_state = new_state;
+
+ INIT_WORK(&vscsi->proc_work, ibmvscsis_disconnect);
+ (void)queue_work(vscsi->work_q, &vscsi->proc_work);
+ } else {
+ if (vscsi->new_state)
+ state = vscsi->new_state;
+ else
+ state = vscsi->state;
+
+ switch (state) {
+ case NO_QUEUE:
+ case UNCONFIGURING:
+ break;
+
+ case ERR_DISCONNECTED:
+ case ERR_DISCONNECT:
+ case UNDEFINED:
+ if (new_state == UNCONFIGURING)
+ vscsi->new_state = new_state;
+ break;
+
+ case ERR_DISCONNECT_RECONNECT:
+ switch (new_state) {
+ case UNCONFIGURING:
+ case ERR_DISCONNECT:
+ vscsi->new_state = new_state;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case WAIT_ENABLED:
+ case PART_UP_WAIT_ENAB:
+ case WAIT_IDLE:
+ case WAIT_CONNECTION:
+ case CONNECTED:
+ case SRP_PROCESSING:
+ vscsi->new_state = new_state;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ pr_debug("Leaving post_disconnect: flags 0x%x, new_state 0x%x\n",
+ vscsi->flags, vscsi->new_state);
+}
+
+/**
+ * ibmvscsis_trans_event() - Handle a Transport Event
+ * @vscsi: Pointer to our adapter structure
+ * @crq: Pointer to CRQ entry containing the Transport Event
+ *
+ * Do the logic to close the I_T nexus. This function may not
+ * behave to specification.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_trans_event(struct scsi_info *vscsi,
+ struct viosrp_crq *crq)
+{
+ long rc = ADAPT_SUCCESS;
+
+ pr_debug("trans_event: format %d, flags 0x%x, state 0x%hx\n",
+ (int)crq->format, vscsi->flags, vscsi->state);
+
+ switch (crq->format) {
+ case MIGRATED:
+ case PARTNER_FAILED:
+ case PARTNER_DEREGISTER:
+ ibmvscsis_delete_client_info(vscsi, true);
+ break;
+
+ default:
+ rc = ERROR;
+ dev_err(&vscsi->dev, "trans_event: invalid format %d\n",
+ (uint)crq->format);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT,
+ RESPONSE_Q_DOWN);
+ break;
+ }
+
+ if (rc == ADAPT_SUCCESS) {
+ switch (vscsi->state) {
+ case NO_QUEUE:
+ case ERR_DISCONNECTED:
+ case UNDEFINED:
+ break;
+
+ case UNCONFIGURING:
+ vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT);
+ break;
+
+ case WAIT_ENABLED:
+ break;
+
+ case WAIT_CONNECTION:
+ break;
+
+ case CONNECTED:
+ ibmvscsis_post_disconnect(vscsi, WAIT_IDLE,
+ (RESPONSE_Q_DOWN |
+ TRANS_EVENT));
+ break;
+
+ case PART_UP_WAIT_ENAB:
+ vscsi->state = WAIT_ENABLED;
+ break;
+
+ case SRP_PROCESSING:
+ if ((vscsi->debit > 0) ||
+ !list_empty(&vscsi->schedule_q) ||
+ !list_empty(&vscsi->waiting_rsp) ||
+ !list_empty(&vscsi->active_q)) {
+ pr_debug("debit %d, sched %d, wait %d, active %d\n",
+ vscsi->debit,
+ (int)list_empty(&vscsi->schedule_q),
+ (int)list_empty(&vscsi->waiting_rsp),
+ (int)list_empty(&vscsi->active_q));
+ pr_warn("connection lost with outstanding work\n");
+ } else {
+ pr_debug("trans_event: SRP Processing, but no outstanding work\n");
+ }
+
+ ibmvscsis_post_disconnect(vscsi, WAIT_IDLE,
+ (RESPONSE_Q_DOWN |
+ TRANS_EVENT));
+ break;
+
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ case WAIT_IDLE:
+ vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT);
+ break;
+ }
+ }
+
+ rc = vscsi->flags & SCHEDULE_DISCONNECT;
+
+ pr_debug("Leaving trans_event: flags 0x%x, state 0x%hx, rc %ld\n",
+ vscsi->flags, vscsi->state, rc);
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_poll_cmd_q() - Poll Command Queue
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Called to handle command elements that may have arrived while
+ * interrupts were disabled.
+ *
+ * EXECUTION ENVIRONMENT:
+ * intr_lock must be held
+ */
+static void ibmvscsis_poll_cmd_q(struct scsi_info *vscsi)
+{
+ struct viosrp_crq *crq;
+ long rc;
+ bool ack = true;
+ volatile u8 valid;
+
+ pr_debug("poll_cmd_q: flags 0x%x, state 0x%hx, q index %ud\n",
+ vscsi->flags, vscsi->state, vscsi->cmd_q.index);
+
+ rc = vscsi->flags & SCHEDULE_DISCONNECT;
+ crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index;
+ valid = crq->valid;
+ dma_rmb();
+
+ while (valid) {
+poll_work:
+ vscsi->cmd_q.index =
+ (vscsi->cmd_q.index + 1) & vscsi->cmd_q.mask;
+
+ if (!rc) {
+ rc = ibmvscsis_parse_command(vscsi, crq);
+ } else {
+ if ((uint)crq->valid == VALID_TRANS_EVENT) {
+ /*
+ * must service the transport layer events even
+ * in an error state, dont break out until all
+ * the consecutive transport events have been
+ * processed
+ */
+ rc = ibmvscsis_trans_event(vscsi, crq);
+ } else if (vscsi->flags & TRANS_EVENT) {
+ /*
+ * if a tranport event has occurred leave
+ * everything but transport events on the queue
+ */
+ pr_debug("poll_cmd_q, ignoring\n");
+
+ /*
+ * need to decrement the queue index so we can
+ * look at the elment again
+ */
+ if (vscsi->cmd_q.index)
+ vscsi->cmd_q.index -= 1;
+ else
+ /*
+ * index is at 0 it just wrapped.
+ * have it index last element in q
+ */
+ vscsi->cmd_q.index = vscsi->cmd_q.mask;
+ break;
+ }
+ }
+
+ crq->valid = INVALIDATE_CMD_RESP_EL;
+
+ crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index;
+ valid = crq->valid;
+ dma_rmb();
+ }
+
+ if (!rc) {
+ if (ack) {
+ vio_enable_interrupts(vscsi->dma_dev);
+ ack = false;
+ pr_debug("poll_cmd_q, reenabling interrupts\n");
+ }
+ valid = crq->valid;
+ dma_rmb();
+ if (valid)
+ goto poll_work;
+ }
+
+ pr_debug("Leaving poll_cmd_q: rc %ld\n", rc);
+}
+
+/**
+ * ibmvscsis_free_cmd_qs() - Free elements in queue
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Free all of the elements on all queues that are waiting for
+ * whatever reason.
+ *
+ * PRECONDITION:
+ * Called with interrupt lock held
+ */
+static void ibmvscsis_free_cmd_qs(struct scsi_info *vscsi)
+{
+ struct ibmvscsis_cmd *cmd, *nxt;
+
+ pr_debug("free_cmd_qs: waiting_rsp empty %d, timer starter %d\n",
+ (int)list_empty(&vscsi->waiting_rsp),
+ vscsi->rsp_q_timer.started);
+
+ list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) {
+ list_del(&cmd->list);
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ }
+}
+
+/**
+ * ibmvscsis_get_free_cmd() - Get free command from list
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Must be called with interrupt lock held.
+ */
+static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi)
+{
+ struct ibmvscsis_cmd *cmd = NULL;
+ struct iu_entry *iue;
+
+ iue = srp_iu_get(&vscsi->target);
+ if (iue) {
+ cmd = list_first_entry_or_null(&vscsi->free_cmd,
+ struct ibmvscsis_cmd, list);
+ if (cmd) {
+ list_del(&cmd->list);
+ cmd->iue = iue;
+ cmd->type = UNSET_TYPE;
+ memset(&cmd->se_cmd, 0, sizeof(cmd->se_cmd));
+ } else {
+ srp_iu_put(iue);
+ }
+ }
+
+ return cmd;
+}
+
+/**
+ * ibmvscsis_adapter_idle() - Helper function to handle idle adapter
+ * @vscsi: Pointer to our adapter structure
+ *
+ * This function is called when the adapter is idle when the driver
+ * is attempting to clear an error condition.
+ * The adapter is considered busy if any of its cmd queues
+ * are non-empty. This function can be invoked
+ * from the off level disconnect function.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process environment called with interrupt lock held
+ */
+static void ibmvscsis_adapter_idle(struct scsi_info *vscsi)
+{
+ int free_qs = false;
+
+ pr_debug("adapter_idle: flags 0x%x, state 0x%hx\n", vscsi->flags,
+ vscsi->state);
+
+ /* Only need to free qs if we're disconnecting from client */
+ if (vscsi->state != WAIT_CONNECTION || vscsi->flags & TRANS_EVENT)
+ free_qs = true;
+
+ switch (vscsi->state) {
+ case ERR_DISCONNECT_RECONNECT:
+ ibmvscsis_reset_queue(vscsi, WAIT_CONNECTION);
+ pr_debug("adapter_idle, disc_rec: flags 0x%x\n", vscsi->flags);
+ break;
+
+ case ERR_DISCONNECT:
+ ibmvscsis_free_command_q(vscsi);
+ vscsi->flags &= ~DISCONNECT_SCHEDULED;
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ vscsi->state = ERR_DISCONNECTED;
+ pr_debug("adapter_idle, disc: flags 0x%x, state 0x%hx\n",
+ vscsi->flags, vscsi->state);
+ break;
+
+ case WAIT_IDLE:
+ vscsi->rsp_q_timer.timer_pops = 0;
+ vscsi->debit = 0;
+ vscsi->credit = 0;
+ if (vscsi->flags & TRANS_EVENT) {
+ vscsi->state = WAIT_CONNECTION;
+ vscsi->flags &= PRESERVE_FLAG_FIELDS;
+ } else {
+ vscsi->state = CONNECTED;
+ vscsi->flags &= ~DISCONNECT_SCHEDULED;
+ }
+
+ pr_debug("adapter_idle, wait: flags 0x%x, state 0x%hx\n",
+ vscsi->flags, vscsi->state);
+ ibmvscsis_poll_cmd_q(vscsi);
+ break;
+
+ case ERR_DISCONNECTED:
+ vscsi->flags &= ~DISCONNECT_SCHEDULED;
+ pr_debug("adapter_idle, disconnected: flags 0x%x, state 0x%hx\n",
+ vscsi->flags, vscsi->state);
+ break;
+
+ default:
+ dev_err(&vscsi->dev, "adapter_idle: in invalid state %d\n",
+ vscsi->state);
+ break;
+ }
+
+ if (free_qs)
+ ibmvscsis_free_cmd_qs(vscsi);
+
+ /*
+ * There is a timing window where we could lose a disconnect request.
+ * The known path to this window occurs during the DISCONNECT_RECONNECT
+ * case above: reset_queue calls free_command_q, which will release the
+ * interrupt lock. During that time, a new post_disconnect call can be
+ * made with a "more severe" state (DISCONNECT or UNCONFIGURING).
+ * Because the DISCONNECT_SCHEDULED flag is already set, post_disconnect
+ * will only set the new_state. Now free_command_q reacquires the intr
+ * lock and clears the DISCONNECT_SCHEDULED flag (using PRESERVE_FLAG_
+ * FIELDS), and the disconnect is lost. This is particularly bad when
+ * the new disconnect was for UNCONFIGURING, since the unconfigure hangs
+ * forever.
+ * Fix is that free command queue sets acr state and acr flags if there
+ * is a change under the lock
+ * note free command queue writes to this state it clears it
+ * before releasing the lock, different drivers call the free command
+ * queue different times so dont initialize above
+ */
+ if (vscsi->phyp_acr_state != 0) {
+ /*
+ * set any bits in flags that may have been cleared by
+ * a call to free command queue in switch statement
+ * or reset queue
+ */
+ vscsi->flags |= vscsi->phyp_acr_flags;
+ ibmvscsis_post_disconnect(vscsi, vscsi->phyp_acr_state, 0);
+ vscsi->phyp_acr_state = 0;
+ vscsi->phyp_acr_flags = 0;
+
+ pr_debug("adapter_idle: flags 0x%x, state 0x%hx, acr_flags 0x%x, acr_state 0x%hx\n",
+ vscsi->flags, vscsi->state, vscsi->phyp_acr_flags,
+ vscsi->phyp_acr_state);
+ }
+
+ pr_debug("Leaving adapter_idle: flags 0x%x, state 0x%hx, new_state 0x%x\n",
+ vscsi->flags, vscsi->state, vscsi->new_state);
+}
+
+/**
+ * ibmvscsis_copy_crq_packet() - Copy CRQ Packet
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Pointer to command element to use to process the request
+ * @crq: Pointer to CRQ entry containing the request
+ *
+ * Copy the srp information unit from the hosted
+ * partition using remote dma
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_copy_crq_packet(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd,
+ struct viosrp_crq *crq)
+{
+ struct iu_entry *iue = cmd->iue;
+ long rc = 0;
+ u16 len;
+
+ len = be16_to_cpu(crq->IU_length);
+ if ((len > SRP_MAX_IU_LEN) || (len == 0)) {
+ dev_err(&vscsi->dev, "copy_crq: Invalid len %d passed", len);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ return SRP_VIOLATION;
+ }
+
+ rc = h_copy_rdma(len, vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(crq->IU_data_ptr),
+ vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma);
+
+ switch (rc) {
+ case H_SUCCESS:
+ cmd->init_time = mftb();
+ iue->remote_token = crq->IU_data_ptr;
+ iue->iu_len = len;
+ pr_debug("copy_crq: ioba 0x%llx, init_time 0x%llx\n",
+ be64_to_cpu(crq->IU_data_ptr), cmd->init_time);
+ break;
+ case H_PERMISSION:
+ if (connection_broken(vscsi))
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT,
+ (RESPONSE_Q_DOWN |
+ CLIENT_FAILED));
+ else
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT, 0);
+
+ dev_err(&vscsi->dev, "copy_crq: h_copy_rdma failed, rc %ld\n",
+ rc);
+ break;
+ case H_DEST_PARM:
+ case H_SOURCE_PARM:
+ default:
+ dev_err(&vscsi->dev, "copy_crq: h_copy_rdma failed, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_adapter_info - Service an Adapter Info MAnagement Data gram
+ * @vscsi: Pointer to our adapter structure
+ * @iue: Information Unit containing the Adapter Info MAD request
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt adpater lock is held
+ */
+static long ibmvscsis_adapter_info(struct scsi_info *vscsi,
+ struct iu_entry *iue)
+{
+ struct viosrp_adapter_info *mad = &vio_iu(iue)->mad.adapter_info;
+ struct mad_adapter_info_data *info;
+ uint flag_bits = 0;
+ dma_addr_t token;
+ long rc;
+
+ mad->common.status = cpu_to_be16(VIOSRP_MAD_SUCCESS);
+
+ if (be16_to_cpu(mad->common.length) > sizeof(*info)) {
+ mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED);
+ return 0;
+ }
+
+ info = dma_alloc_coherent(&vscsi->dma_dev->dev, sizeof(*info), &token,
+ GFP_KERNEL);
+ if (!info) {
+ dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n",
+ iue->target);
+ mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED);
+ return 0;
+ }
+
+ /* Get remote info */
+ rc = h_copy_rdma(be16_to_cpu(mad->common.length),
+ vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(mad->buffer),
+ vscsi->dds.window[LOCAL].liobn, token);
+
+ if (rc != H_SUCCESS) {
+ if (rc == H_PERMISSION) {
+ if (connection_broken(vscsi))
+ flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED);
+ }
+ pr_warn("adapter_info: h_copy_rdma from client failed, rc %ld\n",
+ rc);
+ pr_debug("adapter_info: ioba 0x%llx, flags 0x%x, flag_bits 0x%x\n",
+ be64_to_cpu(mad->buffer), vscsi->flags, flag_bits);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT,
+ flag_bits);
+ goto free_dma;
+ }
+
+ /*
+ * Copy client info, but ignore partition number, which we
+ * already got from phyp - unless we failed to get it from
+ * phyp (e.g. if we're running on a p5 system).
+ */
+ if (vscsi->client_data.partition_number == 0)
+ vscsi->client_data.partition_number =
+ be32_to_cpu(info->partition_number);
+ strncpy(vscsi->client_data.srp_version, info->srp_version,
+ sizeof(vscsi->client_data.srp_version));
+ strncpy(vscsi->client_data.partition_name, info->partition_name,
+ sizeof(vscsi->client_data.partition_name));
+ vscsi->client_data.mad_version = be32_to_cpu(info->mad_version);
+ vscsi->client_data.os_type = be32_to_cpu(info->os_type);
+
+ /* Copy our info */
+ strncpy(info->srp_version, SRP_VERSION,
+ sizeof(info->srp_version));
+ strncpy(info->partition_name, vscsi->dds.partition_name,
+ sizeof(info->partition_name));
+ info->partition_number = cpu_to_be32(vscsi->dds.partition_num);
+ info->mad_version = cpu_to_be32(MAD_VERSION_1);
+ info->os_type = cpu_to_be32(LINUX);
+ memset(&info->port_max_txu[0], 0, sizeof(info->port_max_txu));
+ info->port_max_txu[0] = cpu_to_be32(128 * PAGE_SIZE);
+
+ dma_wmb();
+ rc = h_copy_rdma(sizeof(*info), vscsi->dds.window[LOCAL].liobn,
+ token, vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(mad->buffer));
+ switch (rc) {
+ case H_SUCCESS:
+ break;
+
+ case H_SOURCE_PARM:
+ case H_DEST_PARM:
+ case H_PERMISSION:
+ if (connection_broken(vscsi))
+ flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED);
+ default:
+ dev_err(&vscsi->dev, "adapter_info: h_copy_rdma to client failed, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT,
+ flag_bits);
+ break;
+ }
+
+free_dma:
+ dma_free_coherent(&vscsi->dma_dev->dev, sizeof(*info), info, token);
+ pr_debug("Leaving adapter_info, rc %ld\n", rc);
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_cap_mad() - Service a Capabilities MAnagement Data gram
+ * @vscsi: Pointer to our adapter structure
+ * @iue: Information Unit containing the Capabilities MAD request
+ *
+ * NOTE: if you return an error from this routine you must be
+ * disconnecting or you will cause a hang
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt called with adapter lock held
+ */
+static int ibmvscsis_cap_mad(struct scsi_info *vscsi, struct iu_entry *iue)
+{
+ struct viosrp_capabilities *mad = &vio_iu(iue)->mad.capabilities;
+ struct capabilities *cap;
+ struct mad_capability_common *common;
+ dma_addr_t token;
+ u16 olen, len, status, min_len, cap_len;
+ u32 flag;
+ uint flag_bits = 0;
+ long rc = 0;
+
+ olen = be16_to_cpu(mad->common.length);
+ /*
+ * struct capabilities hardcodes a couple capabilities after the
+ * header, but the capabilities can actually be in any order.
+ */
+ min_len = offsetof(struct capabilities, migration);
+ if ((olen < min_len) || (olen > PAGE_SIZE)) {
+ pr_warn("cap_mad: invalid len %d\n", olen);
+ mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED);
+ return 0;
+ }
+
+ cap = dma_alloc_coherent(&vscsi->dma_dev->dev, olen, &token,
+ GFP_KERNEL);
+ if (!cap) {
+ dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n",
+ iue->target);
+ mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED);
+ return 0;
+ }
+ rc = h_copy_rdma(olen, vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(mad->buffer),
+ vscsi->dds.window[LOCAL].liobn, token);
+ if (rc == H_SUCCESS) {
+ strncpy(cap->name, dev_name(&vscsi->dma_dev->dev),
+ SRP_MAX_LOC_LEN);
+
+ len = olen - min_len;
+ status = VIOSRP_MAD_SUCCESS;
+ common = (struct mad_capability_common *)&cap->migration;
+
+ while ((len > 0) && (status == VIOSRP_MAD_SUCCESS) && !rc) {
+ pr_debug("cap_mad: len left %hd, cap type %d, cap len %hd\n",
+ len, be32_to_cpu(common->cap_type),
+ be16_to_cpu(common->length));
+
+ cap_len = be16_to_cpu(common->length);
+ if (cap_len > len) {
+ dev_err(&vscsi->dev, "cap_mad: cap len mismatch with total len\n");
+ status = VIOSRP_MAD_FAILED;
+ break;
+ }
+
+ if (cap_len == 0) {
+ dev_err(&vscsi->dev, "cap_mad: cap len is 0\n");
+ status = VIOSRP_MAD_FAILED;
+ break;
+ }
+
+ switch (common->cap_type) {
+ default:
+ pr_debug("cap_mad: unsupported capability\n");
+ common->server_support = 0;
+ flag = cpu_to_be32((u32)CAP_LIST_SUPPORTED);
+ cap->flags &= ~flag;
+ break;
+ }
+
+ len = len - cap_len;
+ common = (struct mad_capability_common *)
+ ((char *)common + cap_len);
+ }
+
+ mad->common.status = cpu_to_be16(status);
+
+ dma_wmb();
+ rc = h_copy_rdma(olen, vscsi->dds.window[LOCAL].liobn, token,
+ vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(mad->buffer));
+
+ if (rc != H_SUCCESS) {
+ pr_debug("cap_mad: failed to copy to client, rc %ld\n",
+ rc);
+
+ if (rc == H_PERMISSION) {
+ if (connection_broken(vscsi))
+ flag_bits = (RESPONSE_Q_DOWN |
+ CLIENT_FAILED);
+ }
+
+ pr_warn("cap_mad: error copying data to client, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT,
+ flag_bits);
+ }
+ }
+
+ dma_free_coherent(&vscsi->dma_dev->dev, olen, cap, token);
+
+ pr_debug("Leaving cap_mad, rc %ld, client_cap 0x%x\n",
+ rc, vscsi->client_cap);
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_process_mad() - Service a MAnagement Data gram
+ * @vscsi: Pointer to our adapter structure
+ * @iue: Information Unit containing the MAD request
+ *
+ * Must be called with interrupt lock held.
+ */
+static long ibmvscsis_process_mad(struct scsi_info *vscsi, struct iu_entry *iue)
+{
+ struct mad_common *mad = (struct mad_common *)&vio_iu(iue)->mad;
+ struct viosrp_empty_iu *empty;
+ long rc = ADAPT_SUCCESS;
+
+ switch (be32_to_cpu(mad->type)) {
+ case VIOSRP_EMPTY_IU_TYPE:
+ empty = &vio_iu(iue)->mad.empty_iu;
+ vscsi->empty_iu_id = be64_to_cpu(empty->buffer);
+ vscsi->empty_iu_tag = be64_to_cpu(empty->common.tag);
+ mad->status = cpu_to_be16(VIOSRP_MAD_SUCCESS);
+ break;
+ case VIOSRP_ADAPTER_INFO_TYPE:
+ rc = ibmvscsis_adapter_info(vscsi, iue);
+ break;
+ case VIOSRP_CAPABILITIES_TYPE:
+ rc = ibmvscsis_cap_mad(vscsi, iue);
+ break;
+ case VIOSRP_ENABLE_FAST_FAIL:
+ if (vscsi->state == CONNECTED) {
+ vscsi->fast_fail = true;
+ mad->status = cpu_to_be16(VIOSRP_MAD_SUCCESS);
+ } else {
+ pr_warn("fast fail mad sent after login\n");
+ mad->status = cpu_to_be16(VIOSRP_MAD_FAILED);
+ }
+ break;
+ default:
+ mad->status = cpu_to_be16(VIOSRP_MAD_NOT_SUPPORTED);
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * srp_snd_msg_failed() - Handle an error when sending a response
+ * @vscsi: Pointer to our adapter structure
+ * @rc: The return code from the h_send_crq command
+ *
+ * Must be called with interrupt lock held.
+ */
+static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc)
+{
+ ktime_t kt;
+
+ if (rc != H_DROPPED) {
+ ibmvscsis_free_cmd_qs(vscsi);
+
+ if (rc == H_CLOSED)
+ vscsi->flags |= CLIENT_FAILED;
+
+ /* don't flag the same problem multiple times */
+ if (!(vscsi->flags & RESPONSE_Q_DOWN)) {
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ if (!(vscsi->state & (ERR_DISCONNECT |
+ ERR_DISCONNECT_RECONNECT |
+ ERR_DISCONNECTED | UNDEFINED))) {
+ dev_err(&vscsi->dev, "snd_msg_failed: setting RESPONSE_Q_DOWN, state 0x%hx, flags 0x%x, rc %ld\n",
+ vscsi->state, vscsi->flags, rc);
+ }
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT, 0);
+ }
+ return;
+ }
+
+ /*
+ * The response queue is full.
+ * If the server is processing SRP requests, i.e.
+ * the client has successfully done an
+ * SRP_LOGIN, then it will wait forever for room in
+ * the queue. However if the system admin
+ * is attempting to unconfigure the server then one
+ * or more children will be in a state where
+ * they are being removed. So if there is even one
+ * child being removed then the driver assumes
+ * the system admin is attempting to break the
+ * connection with the client and MAX_TIMER_POPS
+ * is honored.
+ */
+ if ((vscsi->rsp_q_timer.timer_pops < MAX_TIMER_POPS) ||
+ (vscsi->state == SRP_PROCESSING)) {
+ pr_debug("snd_msg_failed: response queue full, flags 0x%x, timer started %d, pops %d\n",
+ vscsi->flags, (int)vscsi->rsp_q_timer.started,
+ vscsi->rsp_q_timer.timer_pops);
+
+ /*
+ * Check if the timer is running; if it
+ * is not then start it up.
+ */
+ if (!vscsi->rsp_q_timer.started) {
+ if (vscsi->rsp_q_timer.timer_pops <
+ MAX_TIMER_POPS) {
+ kt = ktime_set(0, WAIT_NANO_SECONDS);
+ } else {
+ /*
+ * slide the timeslice if the maximum
+ * timer pops have already happened
+ */
+ kt = ktime_set(WAIT_SECONDS, 0);
+ }
+
+ vscsi->rsp_q_timer.started = true;
+ hrtimer_start(&vscsi->rsp_q_timer.timer, kt,
+ HRTIMER_MODE_REL);
+ }
+ } else {
+ /*
+ * TBD: Do we need to worry about this? Need to get
+ * remove working.
+ */
+ /*
+ * waited a long time and it appears the system admin
+ * is bring this driver down
+ */
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ ibmvscsis_free_cmd_qs(vscsi);
+ /*
+ * if the driver is already attempting to disconnect
+ * from the client and has already logged an error
+ * trace this event but don't put it in the error log
+ */
+ if (!(vscsi->state & (ERR_DISCONNECT |
+ ERR_DISCONNECT_RECONNECT |
+ ERR_DISCONNECTED | UNDEFINED))) {
+ dev_err(&vscsi->dev, "client crq full too long\n");
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT,
+ 0);
+ }
+ }
+}
+
+/**
+ * ibmvscsis_send_messages() - Send a Response
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Send a response, first checking the waiting queue. Responses are
+ * sent in order they are received. If the response cannot be sent,
+ * because the client queue is full, it stays on the waiting queue.
+ *
+ * PRECONDITION:
+ * Called with interrupt lock held
+ */
+static void ibmvscsis_send_messages(struct scsi_info *vscsi)
+{
+ u64 msg_hi = 0;
+ /* note do not attmempt to access the IU_data_ptr with this pointer
+ * it is not valid
+ */
+ struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi;
+ struct ibmvscsis_cmd *cmd, *nxt;
+ struct iu_entry *iue;
+ long rc = ADAPT_SUCCESS;
+
+ if (!(vscsi->flags & RESPONSE_Q_DOWN)) {
+ list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) {
+ pr_debug("send_messages cmd %p\n", cmd);
+
+ iue = cmd->iue;
+
+ crq->valid = VALID_CMD_RESP_EL;
+ crq->format = cmd->rsp.format;
+
+ if (cmd->flags & CMD_FAST_FAIL)
+ crq->status = VIOSRP_ADAPTER_FAIL;
+
+ crq->IU_length = cpu_to_be16(cmd->rsp.len);
+
+ rc = h_send_crq(vscsi->dma_dev->unit_address,
+ be64_to_cpu(msg_hi),
+ be64_to_cpu(cmd->rsp.tag));
+
+ pr_debug("send_messages: tag 0x%llx, rc %ld\n",
+ be64_to_cpu(cmd->rsp.tag), rc);
+
+ /* if all ok free up the command element resources */
+ if (rc == H_SUCCESS) {
+ /* some movement has occurred */
+ vscsi->rsp_q_timer.timer_pops = 0;
+ list_del(&cmd->list);
+
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ } else {
+ srp_snd_msg_failed(vscsi, rc);
+ break;
+ }
+ }
+
+ if (!rc) {
+ /*
+ * The timer could pop with the queue empty. If
+ * this happens, rc will always indicate a
+ * success; clear the pop count.
+ */
+ vscsi->rsp_q_timer.timer_pops = 0;
+ }
+ } else {
+ ibmvscsis_free_cmd_qs(vscsi);
+ }
+}
+
+/* Called with intr lock held */
+static void ibmvscsis_send_mad_resp(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd,
+ struct viosrp_crq *crq)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct mad_common *mad = (struct mad_common *)&vio_iu(iue)->mad;
+ uint flag_bits = 0;
+ long rc;
+
+ dma_wmb();
+ rc = h_copy_rdma(sizeof(struct mad_common),
+ vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma,
+ vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(crq->IU_data_ptr));
+ if (!rc) {
+ cmd->rsp.format = VIOSRP_MAD_FORMAT;
+ cmd->rsp.len = sizeof(struct mad_common);
+ cmd->rsp.tag = mad->tag;
+ list_add_tail(&cmd->list, &vscsi->waiting_rsp);
+ ibmvscsis_send_messages(vscsi);
+ } else {
+ pr_debug("Error sending mad response, rc %ld\n", rc);
+ if (rc == H_PERMISSION) {
+ if (connection_broken(vscsi))
+ flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED);
+ }
+ dev_err(&vscsi->dev, "mad: failed to copy to client, rc %ld\n",
+ rc);
+
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT,
+ flag_bits);
+ }
+}
+
+/**
+ * ibmvscsis_mad() - Service a MAnagement Data gram.
+ * @vscsi: Pointer to our adapter structure
+ * @crq: Pointer to the CRQ entry containing the MAD request
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt called with adapter lock held
+ */
+static long ibmvscsis_mad(struct scsi_info *vscsi, struct viosrp_crq *crq)
+{
+ struct iu_entry *iue;
+ struct ibmvscsis_cmd *cmd;
+ struct mad_common *mad;
+ long rc = ADAPT_SUCCESS;
+
+ switch (vscsi->state) {
+ /*
+ * We have not exchanged Init Msgs yet, so this MAD was sent
+ * before the last Transport Event; client will not be
+ * expecting a response.
+ */
+ case WAIT_CONNECTION:
+ pr_debug("mad: in Wait Connection state, ignoring MAD, flags %d\n",
+ vscsi->flags);
+ return ADAPT_SUCCESS;
+
+ case SRP_PROCESSING:
+ case CONNECTED:
+ break;
+
+ /*
+ * We should never get here while we're in these states.
+ * Just log an error and get out.
+ */
+ case UNCONFIGURING:
+ case WAIT_IDLE:
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ default:
+ dev_err(&vscsi->dev, "mad: invalid adapter state %d for mad\n",
+ vscsi->state);
+ return ADAPT_SUCCESS;
+ }
+
+ cmd = ibmvscsis_get_free_cmd(vscsi);
+ if (!cmd) {
+ dev_err(&vscsi->dev, "mad: failed to get cmd, debit %d\n",
+ vscsi->debit);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ return ERROR;
+ }
+ iue = cmd->iue;
+ cmd->type = ADAPTER_MAD;
+
+ rc = ibmvscsis_copy_crq_packet(vscsi, cmd, crq);
+ if (!rc) {
+ mad = (struct mad_common *)&vio_iu(iue)->mad;
+
+ pr_debug("mad: type %d\n", be32_to_cpu(mad->type));
+
+ if (be16_to_cpu(mad->length) < 0) {
+ dev_err(&vscsi->dev, "mad: length is < 0\n");
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT, 0);
+ rc = SRP_VIOLATION;
+ } else {
+ rc = ibmvscsis_process_mad(vscsi, iue);
+ }
+
+ pr_debug("mad: status %hd, rc %ld\n", be16_to_cpu(mad->status),
+ rc);
+
+ if (!rc)
+ ibmvscsis_send_mad_resp(vscsi, cmd, crq);
+ } else {
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ }
+
+ pr_debug("Leaving mad, rc %ld\n", rc);
+ return rc;
+}
+
+/**
+ * ibmvscsis_login_rsp() - Create/copy a login response notice to the client
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Pointer to the command for the SRP Login request
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_login_rsp(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_login_rsp *rsp = &vio_iu(iue)->srp.login_rsp;
+ struct format_code *fmt;
+ uint flag_bits = 0;
+ long rc = ADAPT_SUCCESS;
+
+ memset(rsp, 0, sizeof(struct srp_login_rsp));
+
+ rsp->opcode = SRP_LOGIN_RSP;
+ rsp->req_lim_delta = cpu_to_be32(vscsi->request_limit);
+ rsp->tag = cmd->rsp.tag;
+ rsp->max_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN);
+ rsp->max_ti_iu_len = cpu_to_be32(SRP_MAX_IU_LEN);
+ fmt = (struct format_code *)&rsp->buf_fmt;
+ fmt->buffers = SUPPORTED_FORMATS;
+ vscsi->credit = 0;
+
+ cmd->rsp.len = sizeof(struct srp_login_rsp);
+
+ dma_wmb();
+ rc = h_copy_rdma(cmd->rsp.len, vscsi->dds.window[LOCAL].liobn,
+ iue->sbuf->dma, vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(iue->remote_token));
+
+ switch (rc) {
+ case H_SUCCESS:
+ break;
+
+ case H_PERMISSION:
+ if (connection_broken(vscsi))
+ flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED;
+ dev_err(&vscsi->dev, "login_rsp: error copying to client, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT,
+ flag_bits);
+ break;
+ case H_SOURCE_PARM:
+ case H_DEST_PARM:
+ default:
+ dev_err(&vscsi->dev, "login_rsp: error copying to client, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_srp_login_rej() - Create/copy a login rejection notice to client
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Pointer to the command for the SRP Login request
+ * @reason: The reason the SRP Login is being rejected, per SRP protocol
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_srp_login_rej(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd, u32 reason)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_login_rej *rej = &vio_iu(iue)->srp.login_rej;
+ struct format_code *fmt;
+ uint flag_bits = 0;
+ long rc = ADAPT_SUCCESS;
+
+ memset(rej, 0, sizeof(*rej));
+
+ rej->opcode = SRP_LOGIN_REJ;
+ rej->reason = cpu_to_be32(reason);
+ rej->tag = cmd->rsp.tag;
+ fmt = (struct format_code *)&rej->buf_fmt;
+ fmt->buffers = SUPPORTED_FORMATS;
+
+ cmd->rsp.len = sizeof(*rej);
+
+ dma_wmb();
+ rc = h_copy_rdma(cmd->rsp.len, vscsi->dds.window[LOCAL].liobn,
+ iue->sbuf->dma, vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(iue->remote_token));
+
+ switch (rc) {
+ case H_SUCCESS:
+ break;
+ case H_PERMISSION:
+ if (connection_broken(vscsi))
+ flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED;
+ dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT,
+ flag_bits);
+ break;
+ case H_SOURCE_PARM:
+ case H_DEST_PARM:
+ default:
+ dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+
+ return rc;
+}
+
+static int ibmvscsis_make_nexus(struct ibmvscsis_tport *tport)
+{
+ char *name = tport->tport_name;
+ struct ibmvscsis_nexus *nexus;
+ int rc;
+
+ if (tport->ibmv_nexus) {
+ pr_debug("tport->ibmv_nexus already exists\n");
+ return 0;
+ }
+
+ nexus = kzalloc(sizeof(*nexus), GFP_KERNEL);
+ if (!nexus) {
+ pr_err("Unable to allocate struct ibmvscsis_nexus\n");
+ return -ENOMEM;
+ }
+
+ nexus->se_sess = target_alloc_session(&tport->se_tpg, 0, 0,
+ TARGET_PROT_NORMAL, name, nexus,
+ NULL);
+ if (IS_ERR(nexus->se_sess)) {
+ rc = PTR_ERR(nexus->se_sess);
+ goto transport_init_fail;
+ }
+
+ tport->ibmv_nexus = nexus;
+
+ return 0;
+
+transport_init_fail:
+ kfree(nexus);
+ return rc;
+}
+
+static int ibmvscsis_drop_nexus(struct ibmvscsis_tport *tport)
+{
+ struct se_session *se_sess;
+ struct ibmvscsis_nexus *nexus;
+
+ nexus = tport->ibmv_nexus;
+ if (!nexus)
+ return -ENODEV;
+
+ se_sess = nexus->se_sess;
+ if (!se_sess)
+ return -ENODEV;
+
+ /*
+ * Release the SCSI I_T Nexus to the emulated ibmvscsis Target Port
+ */
+ transport_deregister_session(se_sess);
+ tport->ibmv_nexus = NULL;
+ kfree(nexus);
+
+ return 0;
+}
+
+/**
+ * ibmvscsis_srp_login() - Process an SRP Login Request
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Command element to use to process the SRP Login request
+ * @crq: Pointer to CRQ entry containing the SRP Login request
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, called with interrupt lock held
+ */
+static long ibmvscsis_srp_login(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd,
+ struct viosrp_crq *crq)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_login_req *req = &vio_iu(iue)->srp.login_req;
+ struct port_id {
+ __be64 id_extension;
+ __be64 io_guid;
+ } *iport, *tport;
+ struct format_code *fmt;
+ u32 reason = 0x0;
+ long rc = ADAPT_SUCCESS;
+
+ iport = (struct port_id *)req->initiator_port_id;
+ tport = (struct port_id *)req->target_port_id;
+ fmt = (struct format_code *)&req->req_buf_fmt;
+ if (be32_to_cpu(req->req_it_iu_len) > SRP_MAX_IU_LEN)
+ reason = SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE;
+ else if (be32_to_cpu(req->req_it_iu_len) < 64)
+ reason = SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL;
+ else if ((be64_to_cpu(iport->id_extension) > (MAX_NUM_PORTS - 1)) ||
+ (be64_to_cpu(tport->id_extension) > (MAX_NUM_PORTS - 1)))
+ reason = SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL;
+ else if (req->req_flags & SRP_MULTICHAN_MULTI)
+ reason = SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED;
+ else if (fmt->buffers & (~SUPPORTED_FORMATS))
+ reason = SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT;
+ else if ((fmt->buffers | SUPPORTED_FORMATS) == 0)
+ reason = SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT;
+
+ if (vscsi->state == SRP_PROCESSING)
+ reason = SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED;
+
+ rc = ibmvscsis_make_nexus(&vscsi->tport);
+ if (rc)
+ reason = SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL;
+
+ cmd->rsp.format = VIOSRP_SRP_FORMAT;
+ cmd->rsp.tag = req->tag;
+
+ pr_debug("srp_login: reason 0x%x\n", reason);
+
+ if (reason)
+ rc = ibmvscsis_srp_login_rej(vscsi, cmd, reason);
+ else
+ rc = ibmvscsis_login_rsp(vscsi, cmd);
+
+ if (!rc) {
+ if (!reason)
+ vscsi->state = SRP_PROCESSING;
+
+ list_add_tail(&cmd->list, &vscsi->waiting_rsp);
+ ibmvscsis_send_messages(vscsi);
+ } else {
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ }
+
+ pr_debug("Leaving srp_login, rc %ld\n", rc);
+ return rc;
+}
+
+/**
+ * ibmvscsis_srp_i_logout() - Helper Function to close I_T Nexus
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Command element to use to process the Implicit Logout request
+ * @crq: Pointer to CRQ entry containing the Implicit Logout request
+ *
+ * Do the logic to close the I_T nexus. This function may not
+ * behave to specification.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_srp_i_logout(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd,
+ struct viosrp_crq *crq)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_i_logout *log_out = &vio_iu(iue)->srp.i_logout;
+ long rc = ADAPT_SUCCESS;
+
+ if ((vscsi->debit > 0) || !list_empty(&vscsi->schedule_q) ||
+ !list_empty(&vscsi->waiting_rsp)) {
+ dev_err(&vscsi->dev, "i_logout: outstanding work\n");
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0);
+ } else {
+ cmd->rsp.format = SRP_FORMAT;
+ cmd->rsp.tag = log_out->tag;
+ cmd->rsp.len = sizeof(struct mad_common);
+ list_add_tail(&cmd->list, &vscsi->waiting_rsp);
+ ibmvscsis_send_messages(vscsi);
+
+ ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, 0);
+ }
+
+ return rc;
+}
+
+/* Called with intr lock held */
+static void ibmvscsis_srp_cmd(struct scsi_info *vscsi, struct viosrp_crq *crq)
+{
+ struct ibmvscsis_cmd *cmd;
+ struct iu_entry *iue;
+ struct srp_cmd *srp;
+ struct srp_tsk_mgmt *tsk;
+ long rc;
+
+ if (vscsi->request_limit - vscsi->debit <= 0) {
+ /* Client has exceeded request limit */
+ dev_err(&vscsi->dev, "Client exceeded the request limit (%d), debit %d\n",
+ vscsi->request_limit, vscsi->debit);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ return;
+ }
+
+ cmd = ibmvscsis_get_free_cmd(vscsi);
+ if (!cmd) {
+ dev_err(&vscsi->dev, "srp_cmd failed to get cmd, debit %d\n",
+ vscsi->debit);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ return;
+ }
+ iue = cmd->iue;
+ srp = &vio_iu(iue)->srp.cmd;
+
+ rc = ibmvscsis_copy_crq_packet(vscsi, cmd, crq);
+ if (rc) {
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ return;
+ }
+
+ if (vscsi->state == SRP_PROCESSING) {
+ switch (srp->opcode) {
+ case SRP_LOGIN_REQ:
+ rc = ibmvscsis_srp_login(vscsi, cmd, crq);
+ break;
+
+ case SRP_TSK_MGMT:
+ tsk = &vio_iu(iue)->srp.tsk_mgmt;
+ pr_debug("tsk_mgmt tag: %llu (0x%llx)\n", tsk->tag,
+ tsk->tag);
+ cmd->rsp.tag = tsk->tag;
+ vscsi->debit += 1;
+ cmd->type = TASK_MANAGEMENT;
+ list_add_tail(&cmd->list, &vscsi->schedule_q);
+ queue_work(vscsi->work_q, &cmd->work);
+ break;
+
+ case SRP_CMD:
+ pr_debug("srp_cmd tag: %llu (0x%llx)\n", srp->tag,
+ srp->tag);
+ cmd->rsp.tag = srp->tag;
+ vscsi->debit += 1;
+ cmd->type = SCSI_CDB;
+ /*
+ * We want to keep track of work waiting for
+ * the workqueue.
+ */
+ list_add_tail(&cmd->list, &vscsi->schedule_q);
+ queue_work(vscsi->work_q, &cmd->work);
+ break;
+
+ case SRP_I_LOGOUT:
+ rc = ibmvscsis_srp_i_logout(vscsi, cmd, crq);
+ break;
+
+ case SRP_CRED_RSP:
+ case SRP_AER_RSP:
+ default:
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ dev_err(&vscsi->dev, "invalid srp cmd, opcode %d\n",
+ (uint)srp->opcode);
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+ } else if (srp->opcode == SRP_LOGIN_REQ && vscsi->state == CONNECTED) {
+ rc = ibmvscsis_srp_login(vscsi, cmd, crq);
+ } else {
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ dev_err(&vscsi->dev, "Invalid state %d to handle srp cmd\n",
+ vscsi->state);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ }
+}
+
+/**
+ * ibmvscsis_ping_response() - Respond to a ping request
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Let the client know that the server is alive and waiting on
+ * its native I/O stack.
+ * If any type of error occurs from the call to queue a ping
+ * response then the client is either not accepting or receiving
+ * interrupts. Disconnect with an error.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_ping_response(struct scsi_info *vscsi)
+{
+ struct viosrp_crq *crq;
+ u64 buffer[2] = { 0, 0 };
+ long rc;
+
+ crq = (struct viosrp_crq *)&buffer;
+ crq->valid = VALID_CMD_RESP_EL;
+ crq->format = (u8)MESSAGE_IN_CRQ;
+ crq->status = PING_RESPONSE;
+
+ rc = h_send_crq(vscsi->dds.unit_id, cpu_to_be64(buffer[MSG_HI]),
+ cpu_to_be64(buffer[MSG_LOW]));
+
+ switch (rc) {
+ case H_SUCCESS:
+ break;
+ case H_CLOSED:
+ vscsi->flags |= CLIENT_FAILED;
+ case H_DROPPED:
+ vscsi->flags |= RESPONSE_Q_DOWN;
+ case H_REMOTE_PARM:
+ dev_err(&vscsi->dev, "ping_response: h_send_crq failed, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ default:
+ dev_err(&vscsi->dev, "ping_response: h_send_crq returned unknown rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0);
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_handle_init_compl_msg() - Respond to an Init Complete Message
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Must be called with interrupt lock held.
+ */
+static long ibmvscsis_handle_init_compl_msg(struct scsi_info *vscsi)
+{
+ long rc = ADAPT_SUCCESS;
+
+ switch (vscsi->state) {
+ case NO_QUEUE:
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ case ERR_DISCONNECTED:
+ case UNCONFIGURING:
+ case UNDEFINED:
+ rc = ERROR;
+ break;
+
+ case WAIT_CONNECTION:
+ vscsi->state = CONNECTED;
+ break;
+
+ case WAIT_IDLE:
+ case SRP_PROCESSING:
+ case CONNECTED:
+ case WAIT_ENABLED:
+ case PART_UP_WAIT_ENAB:
+ default:
+ rc = ERROR;
+ dev_err(&vscsi->dev, "init_msg: invalid state %d to get init compl msg\n",
+ vscsi->state);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_handle_init_msg() - Respond to an Init Message
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Must be called with interrupt lock held.
+ */
+static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi)
+{
+ long rc = ADAPT_SUCCESS;
+
+ switch (vscsi->state) {
+ case WAIT_ENABLED:
+ vscsi->state = PART_UP_WAIT_ENAB;
+ break;
+
+ case WAIT_CONNECTION:
+ rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG);
+ switch (rc) {
+ case H_SUCCESS:
+ vscsi->state = CONNECTED;
+ break;
+
+ case H_PARAMETER:
+ dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n",
+ rc);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0);
+ break;
+
+ case H_DROPPED:
+ dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n",
+ rc);
+ rc = ERROR;
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT, 0);
+ break;
+
+ case H_CLOSED:
+ pr_warn("init_msg: failed to send, rc %ld\n", rc);
+ rc = 0;
+ break;
+ }
+ break;
+
+ case UNDEFINED:
+ rc = ERROR;
+ break;
+
+ case UNCONFIGURING:
+ break;
+
+ case PART_UP_WAIT_ENAB:
+ case CONNECTED:
+ case SRP_PROCESSING:
+ case WAIT_IDLE:
+ case NO_QUEUE:
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ case ERR_DISCONNECTED:
+ default:
+ rc = ERROR;
+ dev_err(&vscsi->dev, "init_msg: invalid state %d to get init msg\n",
+ vscsi->state);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_init_msg() - Respond to an init message
+ * @vscsi: Pointer to our adapter structure
+ * @crq: Pointer to CRQ element containing the Init Message
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, interrupt lock held
+ */
+static long ibmvscsis_init_msg(struct scsi_info *vscsi, struct viosrp_crq *crq)
+{
+ long rc = ADAPT_SUCCESS;
+
+ pr_debug("init_msg: state 0x%hx\n", vscsi->state);
+
+ rc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO,
+ (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0,
+ 0);
+ if (rc == H_SUCCESS) {
+ vscsi->client_data.partition_number =
+ be64_to_cpu(*(u64 *)vscsi->map_buf);
+ pr_debug("init_msg, part num %d\n",
+ vscsi->client_data.partition_number);
+ } else {
+ pr_debug("init_msg h_vioctl rc %ld\n", rc);
+ rc = ADAPT_SUCCESS;
+ }
+
+ if (crq->format == INIT_MSG) {
+ rc = ibmvscsis_handle_init_msg(vscsi);
+ } else if (crq->format == INIT_COMPLETE_MSG) {
+ rc = ibmvscsis_handle_init_compl_msg(vscsi);
+ } else {
+ rc = ERROR;
+ dev_err(&vscsi->dev, "init_msg: invalid format %d\n",
+ (uint)crq->format);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_parse_command() - Parse an element taken from the cmd rsp queue.
+ * @vscsi: Pointer to our adapter structure
+ * @crq: Pointer to CRQ element containing the SRP request
+ *
+ * This function will return success if the command queue element is valid
+ * and the srp iu or MAD request it pointed to was also valid. That does
+ * not mean that an error was not returned to the client.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Interrupt, intr lock held
+ */
+static long ibmvscsis_parse_command(struct scsi_info *vscsi,
+ struct viosrp_crq *crq)
+{
+ long rc = ADAPT_SUCCESS;
+
+ switch (crq->valid) {
+ case VALID_CMD_RESP_EL:
+ switch (crq->format) {
+ case OS400_FORMAT:
+ case AIX_FORMAT:
+ case LINUX_FORMAT:
+ case MAD_FORMAT:
+ if (vscsi->flags & PROCESSING_MAD) {
+ rc = ERROR;
+ dev_err(&vscsi->dev, "parse_command: already processing mad\n");
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT,
+ 0);
+ } else {
+ vscsi->flags |= PROCESSING_MAD;
+ rc = ibmvscsis_mad(vscsi, crq);
+ }
+ break;
+
+ case SRP_FORMAT:
+ ibmvscsis_srp_cmd(vscsi, crq);
+ break;
+
+ case MESSAGE_IN_CRQ:
+ if (crq->status == PING)
+ ibmvscsis_ping_response(vscsi);
+ break;
+
+ default:
+ dev_err(&vscsi->dev, "parse_command: invalid format %d\n",
+ (uint)crq->format);
+ ibmvscsis_post_disconnect(vscsi,
+ ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+ break;
+
+ case VALID_TRANS_EVENT:
+ rc = ibmvscsis_trans_event(vscsi, crq);
+ break;
+
+ case VALID_INIT_MSG:
+ rc = ibmvscsis_init_msg(vscsi, crq);
+ break;
+
+ default:
+ dev_err(&vscsi->dev, "parse_command: invalid valid field %d\n",
+ (uint)crq->valid);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ break;
+ }
+
+ /*
+ * Return only what the interrupt handler cares
+ * about. Most errors we keep right on trucking.
+ */
+ rc = vscsi->flags & SCHEDULE_DISCONNECT;
+
+ return rc;
+}
+
+static int read_dma_window(struct scsi_info *vscsi)
+{
+ struct vio_dev *vdev = vscsi->dma_dev;
+ const __be32 *dma_window;
+ const __be32 *prop;
+
+ /* TODO Using of_parse_dma_window would be better, but it doesn't give
+ * a way to read multiple windows without already knowing the size of
+ * a window or the number of windows.
+ */
+ dma_window = (const __be32 *)vio_get_attribute(vdev,
+ "ibm,my-dma-window",
+ NULL);
+ if (!dma_window) {
+ pr_err("Couldn't find ibm,my-dma-window property\n");
+ return -1;
+ }
+
+ vscsi->dds.window[LOCAL].liobn = be32_to_cpu(*dma_window);
+ dma_window++;
+
+ prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells",
+ NULL);
+ if (!prop) {
+ pr_warn("Couldn't find ibm,#dma-address-cells property\n");
+ dma_window++;
+ } else {
+ dma_window += be32_to_cpu(*prop);
+ }
+
+ prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells",
+ NULL);
+ if (!prop) {
+ pr_warn("Couldn't find ibm,#dma-size-cells property\n");
+ dma_window++;
+ } else {
+ dma_window += be32_to_cpu(*prop);
+ }
+
+ /* dma_window should point to the second window now */
+ vscsi->dds.window[REMOTE].liobn = be32_to_cpu(*dma_window);
+
+ return 0;
+}
+
+static struct ibmvscsis_tport *ibmvscsis_lookup_port(const char *name)
+{
+ struct ibmvscsis_tport *tport = NULL;
+ struct vio_dev *vdev;
+ struct scsi_info *vscsi;
+
+ spin_lock_bh(&ibmvscsis_dev_lock);
+ list_for_each_entry(vscsi, &ibmvscsis_dev_list, list) {
+ vdev = vscsi->dma_dev;
+ if (!strcmp(dev_name(&vdev->dev), name)) {
+ tport = &vscsi->tport;
+ break;
+ }
+ }
+ spin_unlock_bh(&ibmvscsis_dev_lock);
+
+ return tport;
+}
+
+/**
+ * ibmvscsis_parse_cmd() - Parse SRP Command
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Pointer to command element with SRP command
+ *
+ * Parse the srp command; if it is valid then submit it to tcm.
+ * Note: The return code does not reflect the status of the SCSI CDB.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level
+ */
+static void ibmvscsis_parse_cmd(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf;
+ struct ibmvscsis_nexus *nexus;
+ u64 data_len = 0;
+ enum dma_data_direction dir;
+ int attr = 0;
+ int rc = 0;
+
+ nexus = vscsi->tport.ibmv_nexus;
+ /*
+ * additional length in bytes. Note that the SRP spec says that
+ * additional length is in 4-byte words, but technically the
+ * additional length field is only the upper 6 bits of the byte.
+ * The lower 2 bits are reserved. If the lower 2 bits are 0 (as
+ * all reserved fields should be), then interpreting the byte as
+ * an int will yield the length in bytes.
+ */
+ if (srp->add_cdb_len & 0x03) {
+ dev_err(&vscsi->dev, "parse_cmd: reserved bits set in IU\n");
+ spin_lock_bh(&vscsi->intr_lock);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ spin_unlock_bh(&vscsi->intr_lock);
+ return;
+ }
+
+ if (srp_get_desc_table(srp, &dir, &data_len)) {
+ dev_err(&vscsi->dev, "0x%llx: parsing SRP descriptor table failed.\n",
+ srp->tag);
+ goto fail;
+ return;
+ }
+
+ cmd->rsp.sol_not = srp->sol_not;
+
+ switch (srp->task_attr) {
+ case SRP_SIMPLE_TASK:
+ attr = TCM_SIMPLE_TAG;
+ break;
+ case SRP_ORDERED_TASK:
+ attr = TCM_ORDERED_TAG;
+ break;
+ case SRP_HEAD_TASK:
+ attr = TCM_HEAD_TAG;
+ break;
+ case SRP_ACA_TASK:
+ attr = TCM_ACA_TAG;
+ break;
+ default:
+ dev_err(&vscsi->dev, "Invalid task attribute %d\n",
+ srp->task_attr);
+ goto fail;
+ }
+
+ cmd->se_cmd.tag = be64_to_cpu(srp->tag);
+
+ spin_lock_bh(&vscsi->intr_lock);
+ list_add_tail(&cmd->list, &vscsi->active_q);
+ spin_unlock_bh(&vscsi->intr_lock);
+
+ srp->lun.scsi_lun[0] &= 0x3f;
+
+ pr_debug("calling submit_cmd, se_cmd %p, lun 0x%llx, cdb 0x%x, attr:%d\n",
+ &cmd->se_cmd, scsilun_to_int(&srp->lun), (int)srp->cdb[0],
+ attr);
+
+ rc = target_submit_cmd(&cmd->se_cmd, nexus->se_sess, srp->cdb,
+ cmd->sense_buf, scsilun_to_int(&srp->lun),
+ data_len, attr, dir, 0);
+ if (rc) {
+ dev_err(&vscsi->dev, "target_submit_cmd failed, rc %d\n", rc);
+ goto fail;
+ }
+ return;
+
+fail:
+ spin_lock_bh(&vscsi->intr_lock);
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ spin_unlock_bh(&vscsi->intr_lock);
+}
+
+/**
+ * ibmvscsis_parse_task() - Parse SRP Task Management Request
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Pointer to command element with SRP task management request
+ *
+ * Parse the srp task management request; if it is valid then submit it to tcm.
+ * Note: The return code does not reflect the status of the task management
+ * request.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Processor level
+ */
+static void ibmvscsis_parse_task(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt;
+ int tcm_type;
+ u64 tag_to_abort = 0;
+ int rc = 0;
+ struct ibmvscsis_nexus *nexus;
+
+ nexus = vscsi->tport.ibmv_nexus;
+
+ cmd->rsp.sol_not = srp_tsk->sol_not;
+
+ switch (srp_tsk->tsk_mgmt_func) {
+ case SRP_TSK_ABORT_TASK:
+ tcm_type = TMR_ABORT_TASK;
+ tag_to_abort = be64_to_cpu(srp_tsk->task_tag);
+ break;
+ case SRP_TSK_ABORT_TASK_SET:
+ tcm_type = TMR_ABORT_TASK_SET;
+ break;
+ case SRP_TSK_CLEAR_TASK_SET:
+ tcm_type = TMR_CLEAR_TASK_SET;
+ break;
+ case SRP_TSK_LUN_RESET:
+ tcm_type = TMR_LUN_RESET;
+ break;
+ case SRP_TSK_CLEAR_ACA:
+ tcm_type = TMR_CLEAR_ACA;
+ break;
+ default:
+ dev_err(&vscsi->dev, "unknown task mgmt func %d\n",
+ srp_tsk->tsk_mgmt_func);
+ cmd->se_cmd.se_tmr_req->response =
+ TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
+ rc = -1;
+ break;
+ }
+
+ if (!rc) {
+ cmd->se_cmd.tag = be64_to_cpu(srp_tsk->tag);
+
+ spin_lock_bh(&vscsi->intr_lock);
+ list_add_tail(&cmd->list, &vscsi->active_q);
+ spin_unlock_bh(&vscsi->intr_lock);
+
+ srp_tsk->lun.scsi_lun[0] &= 0x3f;
+
+ pr_debug("calling submit_tmr, func %d\n",
+ srp_tsk->tsk_mgmt_func);
+ rc = target_submit_tmr(&cmd->se_cmd, nexus->se_sess, NULL,
+ scsilun_to_int(&srp_tsk->lun), srp_tsk,
+ tcm_type, GFP_KERNEL, tag_to_abort, 0);
+ if (rc) {
+ dev_err(&vscsi->dev, "target_submit_tmr failed, rc %d\n",
+ rc);
+ cmd->se_cmd.se_tmr_req->response =
+ TMR_FUNCTION_REJECTED;
+ }
+ }
+
+ if (rc)
+ transport_send_check_condition_and_sense(&cmd->se_cmd, 0, 0);
+}
+
+static void ibmvscsis_scheduler(struct work_struct *work)
+{
+ struct ibmvscsis_cmd *cmd = container_of(work, struct ibmvscsis_cmd,
+ work);
+ struct scsi_info *vscsi = cmd->adapter;
+
+ spin_lock_bh(&vscsi->intr_lock);
+
+ /* Remove from schedule_q */
+ list_del(&cmd->list);
+
+ /* Don't submit cmd if we're disconnecting */
+ if (vscsi->flags & (SCHEDULE_DISCONNECT | DISCONNECT_SCHEDULED)) {
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+
+ /* ibmvscsis_disconnect might be waiting for us */
+ if (list_empty(&vscsi->active_q) &&
+ list_empty(&vscsi->schedule_q) &&
+ (vscsi->flags & WAIT_FOR_IDLE)) {
+ vscsi->flags &= ~WAIT_FOR_IDLE;
+ complete(&vscsi->wait_idle);
+ }
+
+ spin_unlock_bh(&vscsi->intr_lock);
+ return;
+ }
+
+ spin_unlock_bh(&vscsi->intr_lock);
+
+ switch (cmd->type) {
+ case SCSI_CDB:
+ ibmvscsis_parse_cmd(vscsi, cmd);
+ break;
+ case TASK_MANAGEMENT:
+ ibmvscsis_parse_task(vscsi, cmd);
+ break;
+ default:
+ dev_err(&vscsi->dev, "scheduler, invalid cmd type %d\n",
+ cmd->type);
+ spin_lock_bh(&vscsi->intr_lock);
+ ibmvscsis_free_cmd_resources(vscsi, cmd);
+ spin_unlock_bh(&vscsi->intr_lock);
+ break;
+ }
+}
+
+static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num)
+{
+ struct ibmvscsis_cmd *cmd;
+ int i;
+
+ INIT_LIST_HEAD(&vscsi->free_cmd);
+ vscsi->cmd_pool = kcalloc(num, sizeof(struct ibmvscsis_cmd),
+ GFP_KERNEL);
+ if (!vscsi->cmd_pool)
+ return -ENOMEM;
+
+ for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num;
+ i++, cmd++) {
+ cmd->adapter = vscsi;
+ INIT_WORK(&cmd->work, ibmvscsis_scheduler);
+ list_add_tail(&cmd->list, &vscsi->free_cmd);
+ }
+
+ return 0;
+}
+
+static void ibmvscsis_free_cmds(struct scsi_info *vscsi)
+{
+ kfree(vscsi->cmd_pool);
+ vscsi->cmd_pool = NULL;
+ INIT_LIST_HEAD(&vscsi->free_cmd);
+}
+
+/**
+ * ibmvscsis_service_wait_q() - Service Waiting Queue
+ * @timer: Pointer to timer which has expired
+ *
+ * This routine is called when the timer pops to service the waiting
+ * queue. Elements on the queue have completed, their responses have been
+ * copied to the client, but the client's response queue was full so
+ * the queue message could not be sent. The routine grabs the proper locks
+ * and calls send messages.
+ *
+ * EXECUTION ENVIRONMENT:
+ * called at interrupt level
+ */
+static enum hrtimer_restart ibmvscsis_service_wait_q(struct hrtimer *timer)
+{
+ struct timer_cb *p_timer = container_of(timer, struct timer_cb, timer);
+ struct scsi_info *vscsi = container_of(p_timer, struct scsi_info,
+ rsp_q_timer);
+
+ spin_lock_bh(&vscsi->intr_lock);
+ p_timer->timer_pops += 1;
+ p_timer->started = false;
+ ibmvscsis_send_messages(vscsi);
+ spin_unlock_bh(&vscsi->intr_lock);
+
+ return HRTIMER_NORESTART;
+}
+
+static long ibmvscsis_alloctimer(struct scsi_info *vscsi)
+{
+ struct timer_cb *p_timer;
+
+ p_timer = &vscsi->rsp_q_timer;
+ hrtimer_init(&p_timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+ p_timer->timer.function = ibmvscsis_service_wait_q;
+ p_timer->started = false;
+ p_timer->timer_pops = 0;
+
+ return ADAPT_SUCCESS;
+}
+
+static void ibmvscsis_freetimer(struct scsi_info *vscsi)
+{
+ struct timer_cb *p_timer;
+
+ p_timer = &vscsi->rsp_q_timer;
+
+ (void)hrtimer_cancel(&p_timer->timer);
+
+ p_timer->started = false;
+ p_timer->timer_pops = 0;
+}
+
+static irqreturn_t ibmvscsis_interrupt(int dummy, void *data)
+{
+ struct scsi_info *vscsi = data;
+
+ vio_disable_interrupts(vscsi->dma_dev);
+ tasklet_schedule(&vscsi->work_task);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ibmvscsis_check_q() - Helper function to Check Init Message Valid
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Checks if a initialize message was queued by the initiatior
+ * while the timing window was open. This function is called from
+ * probe after the CRQ is created and interrupts are enabled.
+ * It would only be used by adapters who wait for some event before
+ * completing the init handshake with the client. For ibmvscsi, this
+ * event is waiting for the port to be enabled.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level only, interrupt lock held
+ */
+static long ibmvscsis_check_q(struct scsi_info *vscsi)
+{
+ uint format;
+ long rc;
+
+ rc = ibmvscsis_check_init_msg(vscsi, &format);
+ if (rc)
+ ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0);
+ else if (format == UNUSED_FORMAT)
+ vscsi->state = WAIT_ENABLED;
+ else
+ vscsi->state = PART_UP_WAIT_ENAB;
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_enable_change_state() - Set new state based on enabled status
+ * @vscsi: Pointer to our adapter structure
+ *
+ * This function determines our new state now that we are enabled. This
+ * may involve sending an Init Complete message to the client.
+ *
+ * Must be called with interrupt lock held.
+ */
+static long ibmvscsis_enable_change_state(struct scsi_info *vscsi)
+{
+ long rc = ADAPT_SUCCESS;
+
+handle_state_change:
+ switch (vscsi->state) {
+ case WAIT_ENABLED:
+ rc = ibmvscsis_send_init_message(vscsi, INIT_MSG);
+ switch (rc) {
+ case H_SUCCESS:
+ case H_DROPPED:
+ case H_CLOSED:
+ vscsi->state = WAIT_CONNECTION;
+ rc = ADAPT_SUCCESS;
+ break;
+
+ case H_PARAMETER:
+ break;
+
+ case H_HARDWARE:
+ break;
+
+ default:
+ vscsi->state = UNDEFINED;
+ rc = H_HARDWARE;
+ break;
+ }
+ break;
+ case PART_UP_WAIT_ENAB:
+ rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG);
+ switch (rc) {
+ case H_SUCCESS:
+ vscsi->state = CONNECTED;
+ rc = ADAPT_SUCCESS;
+ break;
+
+ case H_DROPPED:
+ case H_CLOSED:
+ vscsi->state = WAIT_ENABLED;
+ goto handle_state_change;
+
+ case H_PARAMETER:
+ break;
+
+ case H_HARDWARE:
+ break;
+
+ default:
+ rc = H_HARDWARE;
+ break;
+ }
+ break;
+
+ case WAIT_CONNECTION:
+ case WAIT_IDLE:
+ case SRP_PROCESSING:
+ case CONNECTED:
+ rc = ADAPT_SUCCESS;
+ break;
+ /* should not be able to get here */
+ case UNCONFIGURING:
+ rc = ERROR;
+ vscsi->state = UNDEFINED;
+ break;
+
+ /* driver should never allow this to happen */
+ case ERR_DISCONNECT:
+ case ERR_DISCONNECT_RECONNECT:
+ default:
+ dev_err(&vscsi->dev, "in invalid state %d during enable_change_state\n",
+ vscsi->state);
+ rc = ADAPT_SUCCESS;
+ break;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_create_command_q() - Create Command Queue
+ * @vscsi: Pointer to our adapter structure
+ * @num_cmds: Currently unused. In the future, may be used to determine
+ * the size of the CRQ.
+ *
+ * Allocates memory for command queue maps remote memory into an ioba
+ * initializes the command response queue
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level only
+ */
+static long ibmvscsis_create_command_q(struct scsi_info *vscsi, int num_cmds)
+{
+ long rc = 0;
+ int pages;
+ struct vio_dev *vdev = vscsi->dma_dev;
+
+ /* We might support multiple pages in the future, but just 1 for now */
+ pages = 1;
+
+ vscsi->cmd_q.size = pages;
+
+ vscsi->cmd_q.base_addr =
+ (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL);
+ if (!vscsi->cmd_q.base_addr)
+ return -ENOMEM;
+
+ vscsi->cmd_q.mask = ((uint)pages * CRQ_PER_PAGE) - 1;
+
+ vscsi->cmd_q.crq_token = dma_map_single(&vdev->dev,
+ vscsi->cmd_q.base_addr,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&vdev->dev, vscsi->cmd_q.crq_token)) {
+ free_page((unsigned long)vscsi->cmd_q.base_addr);
+ return -ENOMEM;
+ }
+
+ rc = h_reg_crq(vscsi->dds.unit_id, vscsi->cmd_q.crq_token, PAGE_SIZE);
+ if (rc) {
+ if (rc == H_CLOSED) {
+ vscsi->state = WAIT_ENABLED;
+ rc = 0;
+ } else {
+ dma_unmap_single(&vdev->dev, vscsi->cmd_q.crq_token,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ free_page((unsigned long)vscsi->cmd_q.base_addr);
+ rc = -ENODEV;
+ }
+ } else {
+ vscsi->state = WAIT_ENABLED;
+ }
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_destroy_command_q - Destroy Command Queue
+ * @vscsi: Pointer to our adapter structure
+ *
+ * Releases memory for command queue and unmaps mapped remote memory.
+ *
+ * EXECUTION ENVIRONMENT:
+ * Process level only
+ */
+static void ibmvscsis_destroy_command_q(struct scsi_info *vscsi)
+{
+ dma_unmap_single(&vscsi->dma_dev->dev, vscsi->cmd_q.crq_token,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ free_page((unsigned long)vscsi->cmd_q.base_addr);
+ vscsi->cmd_q.base_addr = NULL;
+ vscsi->state = NO_QUEUE;
+}
+
+static u8 ibmvscsis_fast_fail(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf;
+ struct scsi_sense_hdr sshdr;
+ u8 rc = se_cmd->scsi_status;
+
+ if (vscsi->fast_fail && (READ_CMD(srp->cdb) || WRITE_CMD(srp->cdb)))
+ if (scsi_normalize_sense(se_cmd->sense_buffer,
+ se_cmd->scsi_sense_length, &sshdr))
+ if (sshdr.sense_key == HARDWARE_ERROR &&
+ (se_cmd->residual_count == 0 ||
+ se_cmd->residual_count == se_cmd->data_length)) {
+ rc = NO_SENSE;
+ cmd->flags |= CMD_FAST_FAIL;
+ }
+
+ return rc;
+}
+
+/**
+ * srp_build_response() - Build an SRP response buffer
+ * @vscsi: Pointer to our adapter structure
+ * @cmd: Pointer to command for which to send the response
+ * @len_p: Where to return the length of the IU response sent. This
+ * is needed to construct the CRQ response.
+ *
+ * Build the SRP response buffer and copy it to the client's memory space.
+ */
+static long srp_build_response(struct scsi_info *vscsi,
+ struct ibmvscsis_cmd *cmd, uint *len_p)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct srp_rsp *rsp;
+ uint len;
+ u32 rsp_code;
+ char *data;
+ u32 *tsk_status;
+ long rc = ADAPT_SUCCESS;
+
+ spin_lock_bh(&vscsi->intr_lock);
+
+ rsp = &vio_iu(iue)->srp.rsp;
+ len = sizeof(*rsp);
+ memset(rsp, 0, len);
+ data = rsp->data;
+
+ rsp->opcode = SRP_RSP;
+
+ if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING)
+ rsp->req_lim_delta = cpu_to_be32(vscsi->credit);
+ else
+ rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit);
+ rsp->tag = cmd->rsp.tag;
+ rsp->flags = 0;
+
+ if (cmd->type == SCSI_CDB) {
+ rsp->status = ibmvscsis_fast_fail(vscsi, cmd);
+ if (rsp->status) {
+ pr_debug("build_resp: cmd %p, scsi status %d\n", cmd,
+ (int)rsp->status);
+ ibmvscsis_determine_resid(se_cmd, rsp);
+ if (se_cmd->scsi_sense_length && se_cmd->sense_buffer) {
+ rsp->sense_data_len =
+ cpu_to_be32(se_cmd->scsi_sense_length);
+ rsp->flags |= SRP_RSP_FLAG_SNSVALID;
+ len += se_cmd->scsi_sense_length;
+ memcpy(data, se_cmd->sense_buffer,
+ se_cmd->scsi_sense_length);
+ }
+ rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >>
+ UCSOLNT_RESP_SHIFT;
+ } else if (cmd->flags & CMD_FAST_FAIL) {
+ pr_debug("build_resp: cmd %p, fast fail\n", cmd);
+ rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >>
+ UCSOLNT_RESP_SHIFT;
+ } else {
+ rsp->sol_not = (cmd->rsp.sol_not & SCSOLNT) >>
+ SCSOLNT_RESP_SHIFT;
+ }
+ } else {
+ /* this is task management */
+ rsp->status = 0;
+ rsp->resp_data_len = cpu_to_be32(4);
+ rsp->flags |= SRP_RSP_FLAG_RSPVALID;
+
+ switch (se_cmd->se_tmr_req->response) {
+ case TMR_FUNCTION_COMPLETE:
+ case TMR_TASK_DOES_NOT_EXIST:
+ rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_COMPLETE;
+ rsp->sol_not = (cmd->rsp.sol_not & SCSOLNT) >>
+ SCSOLNT_RESP_SHIFT;
+ break;
+ case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED:
+ case TMR_LUN_DOES_NOT_EXIST:
+ rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_NOT_SUPPORTED;
+ rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >>
+ UCSOLNT_RESP_SHIFT;
+ break;
+ case TMR_FUNCTION_FAILED:
+ case TMR_FUNCTION_REJECTED:
+ default:
+ rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_FAILED;
+ rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >>
+ UCSOLNT_RESP_SHIFT;
+ break;
+ }
+
+ tsk_status = (u32 *)data;
+ *tsk_status = cpu_to_be32(rsp_code);
+ data = (char *)(tsk_status + 1);
+ len += 4;
+ }
+
+ dma_wmb();
+ rc = h_copy_rdma(len, vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma,
+ vscsi->dds.window[REMOTE].liobn,
+ be64_to_cpu(iue->remote_token));
+
+ switch (rc) {
+ case H_SUCCESS:
+ vscsi->credit = 0;
+ *len_p = len;
+ break;
+ case H_PERMISSION:
+ if (connection_broken(vscsi))
+ vscsi->flags |= RESPONSE_Q_DOWN | CLIENT_FAILED;
+
+ dev_err(&vscsi->dev, "build_response: error copying to client, rc %ld, flags 0x%x, state 0x%hx\n",
+ rc, vscsi->flags, vscsi->state);
+ break;
+ case H_SOURCE_PARM:
+ case H_DEST_PARM:
+ default:
+ dev_err(&vscsi->dev, "build_response: error copying to client, rc %ld\n",
+ rc);
+ break;
+ }
+
+ spin_unlock_bh(&vscsi->intr_lock);
+
+ return rc;
+}
+
+static int ibmvscsis_rdma(struct ibmvscsis_cmd *cmd, struct scatterlist *sg,
+ int nsg, struct srp_direct_buf *md, int nmd,
+ enum dma_data_direction dir, unsigned int bytes)
+{
+ struct iu_entry *iue = cmd->iue;
+ struct srp_target *target = iue->target;
+ struct scsi_info *vscsi = target->ldata;
+ struct scatterlist *sgp;
+ dma_addr_t client_ioba, server_ioba;
+ ulong buf_len;
+ ulong client_len, server_len;
+ int md_idx;
+ long tx_len;
+ long rc = 0;
+
+ pr_debug("rdma: dir %d, bytes 0x%x\n", dir, bytes);
+
+ if (bytes == 0)
+ return 0;
+
+ sgp = sg;
+ client_len = 0;
+ server_len = 0;
+ md_idx = 0;
+ tx_len = bytes;
+
+ do {
+ if (client_len == 0) {
+ if (md_idx >= nmd) {
+ dev_err(&vscsi->dev, "rdma: ran out of client memory descriptors\n");
+ rc = -EIO;
+ break;
+ }
+ client_ioba = be64_to_cpu(md[md_idx].va);
+ client_len = be32_to_cpu(md[md_idx].len);
+ }
+ if (server_len == 0) {
+ if (!sgp) {
+ dev_err(&vscsi->dev, "rdma: ran out of scatter/gather list\n");
+ rc = -EIO;
+ break;
+ }
+ server_ioba = sg_dma_address(sgp);
+ server_len = sg_dma_len(sgp);
+ }
+
+ buf_len = tx_len;
+
+ if (buf_len > client_len)
+ buf_len = client_len;
+
+ if (buf_len > server_len)
+ buf_len = server_len;
+
+ if (buf_len > max_vdma_size)
+ buf_len = max_vdma_size;
+
+ if (dir == DMA_TO_DEVICE) {
+ /* read from client */
+ rc = h_copy_rdma(buf_len,
+ vscsi->dds.window[REMOTE].liobn,
+ client_ioba,
+ vscsi->dds.window[LOCAL].liobn,
+ server_ioba);
+ } else {
+ /* write to client */
+ struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf;
+
+ if (!READ_CMD(srp->cdb))
+ print_hex_dump_bytes(" data:", DUMP_PREFIX_NONE,
+ sg_virt(sgp), buf_len);
+ /* The h_copy_rdma will cause phyp, running in another
+ * partition, to read memory, so we need to make sure
+ * the data has been written out, hence these syncs.
+ */
+ /* ensure that everything is in memory */
+ isync();
+ /* ensure that memory has been made visible */
+ dma_wmb();
+ rc = h_copy_rdma(buf_len,
+ vscsi->dds.window[LOCAL].liobn,
+ server_ioba,
+ vscsi->dds.window[REMOTE].liobn,
+ client_ioba);
+ }
+ switch (rc) {
+ case H_SUCCESS:
+ break;
+ case H_PERMISSION:
+ case H_SOURCE_PARM:
+ case H_DEST_PARM:
+ if (connection_broken(vscsi)) {
+ spin_lock_bh(&vscsi->intr_lock);
+ vscsi->flags |=
+ (RESPONSE_Q_DOWN | CLIENT_FAILED);
+ spin_unlock_bh(&vscsi->intr_lock);
+ }
+ dev_err(&vscsi->dev, "rdma: h_copy_rdma failed, rc %ld\n",
+ rc);
+ break;
+
+ default:
+ dev_err(&vscsi->dev, "rdma: unknown error %ld from h_copy_rdma\n",
+ rc);
+ break;
+ }
+
+ if (!rc) {
+ tx_len -= buf_len;
+ if (tx_len) {
+ client_len -= buf_len;
+ if (client_len == 0)
+ md_idx++;
+ else
+ client_ioba += buf_len;
+
+ server_len -= buf_len;
+ if (server_len == 0)
+ sgp = sg_next(sgp);
+ else
+ server_ioba += buf_len;
+ } else {
+ break;
+ }
+ }
+ } while (!rc);
+
+ return rc;
+}
+
+/**
+ * ibmvscsis_handle_crq() - Handle CRQ
+ * @data: Pointer to our adapter structure
+ *
+ * Read the command elements from the command queue and copy the payloads
+ * associated with the command elements to local memory and execute the
+ * SRP requests.
+ *
+ * Note: this is an edge triggered interrupt. It can not be shared.
+ */
+static void ibmvscsis_handle_crq(unsigned long data)
+{
+ struct scsi_info *vscsi = (struct scsi_info *)data;
+ struct viosrp_crq *crq;
+ long rc;
+ bool ack = true;
+ volatile u8 valid;
+
+ spin_lock_bh(&vscsi->intr_lock);
+
+ pr_debug("got interrupt\n");
+
+ /*
+ * if we are in a path where we are waiting for all pending commands
+ * to complete because we received a transport event and anything in
+ * the command queue is for a new connection, do nothing
+ */
+ if (TARGET_STOP(vscsi)) {
+ vio_enable_interrupts(vscsi->dma_dev);
+
+ pr_debug("handle_crq, don't process: flags 0x%x, state 0x%hx\n",
+ vscsi->flags, vscsi->state);
+ spin_unlock_bh(&vscsi->intr_lock);
+ return;
+ }
+
+ rc = vscsi->flags & SCHEDULE_DISCONNECT;
+ crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index;
+ valid = crq->valid;
+ dma_rmb();
+
+ while (valid) {
+ /*
+ * These are edege triggered interrupts. After dropping out of
+ * the while loop, the code must check for work since an
+ * interrupt could be lost, and an elment be left on the queue,
+ * hence the label.
+ */
+cmd_work:
+ vscsi->cmd_q.index =
+ (vscsi->cmd_q.index + 1) & vscsi->cmd_q.mask;
+
+ if (!rc) {
+ rc = ibmvscsis_parse_command(vscsi, crq);
+ } else {
+ if ((uint)crq->valid == VALID_TRANS_EVENT) {
+ /*
+ * must service the transport layer events even
+ * in an error state, dont break out until all
+ * the consecutive transport events have been
+ * processed
+ */
+ rc = ibmvscsis_trans_event(vscsi, crq);
+ } else if (vscsi->flags & TRANS_EVENT) {
+ /*
+ * if a tranport event has occurred leave
+ * everything but transport events on the queue
+ */
+ pr_debug("handle_crq, ignoring\n");
+
+ /*
+ * need to decrement the queue index so we can
+ * look at the elment again
+ */
+ if (vscsi->cmd_q.index)
+ vscsi->cmd_q.index -= 1;
+ else
+ /*
+ * index is at 0 it just wrapped.
+ * have it index last element in q
+ */
+ vscsi->cmd_q.index = vscsi->cmd_q.mask;
+ break;
+ }
+ }
+
+ crq->valid = INVALIDATE_CMD_RESP_EL;
+
+ crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index;
+ valid = crq->valid;
+ dma_rmb();
+ }
+
+ if (!rc) {
+ if (ack) {
+ vio_enable_interrupts(vscsi->dma_dev);
+ ack = false;
+ pr_debug("handle_crq, reenabling interrupts\n");
+ }
+ valid = crq->valid;
+ dma_rmb();
+ if (valid)
+ goto cmd_work;
+ } else {
+ pr_debug("handle_crq, error: flags 0x%x, state 0x%hx, crq index 0x%x\n",
+ vscsi->flags, vscsi->state, vscsi->cmd_q.index);
+ }
+
+ pr_debug("Leaving handle_crq: schedule_q empty %d, flags 0x%x, state 0x%hx\n",
+ (int)list_empty(&vscsi->schedule_q), vscsi->flags,
+ vscsi->state);
+
+ spin_unlock_bh(&vscsi->intr_lock);
+}
+
+static int ibmvscsis_probe(struct vio_dev *vdev,
+ const struct vio_device_id *id)
+{
+ struct scsi_info *vscsi;
+ int rc = 0;
+ long hrc = 0;
+ char wq_name[24];
+
+ vscsi = kzalloc(sizeof(*vscsi), GFP_KERNEL);
+ if (!vscsi) {
+ rc = -ENOMEM;
+ pr_err("probe: allocation of adapter failed\n");
+ return rc;
+ }
+
+ vscsi->dma_dev = vdev;
+ vscsi->dev = vdev->dev;
+ INIT_LIST_HEAD(&vscsi->schedule_q);
+ INIT_LIST_HEAD(&vscsi->waiting_rsp);
+ INIT_LIST_HEAD(&vscsi->active_q);
+
+ snprintf(vscsi->tport.tport_name, 256, "%s", dev_name(&vdev->dev));
+
+ pr_debug("probe tport_name: %s\n", vscsi->tport.tport_name);
+
+ rc = read_dma_window(vscsi);
+ if (rc)
+ goto free_adapter;
+ pr_debug("Probe: liobn 0x%x, riobn 0x%x\n",
+ vscsi->dds.window[LOCAL].liobn,
+ vscsi->dds.window[REMOTE].liobn);
+
+ strcpy(vscsi->eye, "VSCSI ");
+ strncat(vscsi->eye, vdev->name, MAX_EYE);
+
+ vscsi->dds.unit_id = vdev->unit_address;
+
+ spin_lock_bh(&ibmvscsis_dev_lock);
+ list_add_tail(&vscsi->list, &ibmvscsis_dev_list);
+ spin_unlock_bh(&ibmvscsis_dev_lock);
+
+ /*
+ * TBD: How do we determine # of cmds to request? Do we know how
+ * many "children" we have?
+ */
+ vscsi->request_limit = INITIAL_SRP_LIMIT;
+ rc = srp_target_alloc(&vscsi->target, &vdev->dev, vscsi->request_limit,
+ SRP_MAX_IU_LEN);
+ if (rc)
+ goto rem_list;
+
+ vscsi->target.ldata = vscsi;
+
+ rc = ibmvscsis_alloc_cmds(vscsi, vscsi->request_limit);
+ if (rc) {
+ dev_err(&vscsi->dev, "alloc_cmds failed, rc %d, num %d\n",
+ rc, vscsi->request_limit);
+ goto free_target;
+ }
+
+ /*
+ * Note: the lock is used in freeing timers, so must initialize
+ * first so that ordering in case of error is correct.
+ */
+ spin_lock_init(&vscsi->intr_lock);
+
+ rc = ibmvscsis_alloctimer(vscsi);
+ if (rc) {
+ dev_err(&vscsi->dev, "probe: alloctimer failed, rc %d\n", rc);
+ goto free_cmds;
+ }
+
+ rc = ibmvscsis_create_command_q(vscsi, 256);
+ if (rc) {
+ dev_err(&vscsi->dev, "probe: create_command_q failed, rc %d\n",
+ rc);
+ goto free_timer;
+ }
+
+ vscsi->map_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vscsi->map_buf) {
+ rc = -ENOMEM;
+ dev_err(&vscsi->dev, "probe: allocating cmd buffer failed\n");
+ goto destroy_queue;
+ }
+
+ vscsi->map_ioba = dma_map_single(&vdev->dev, vscsi->map_buf, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&vdev->dev, vscsi->map_ioba)) {
+ dev_err(&vscsi->dev, "probe: error mapping command buffer\n");
+ goto free_buf;
+ }
+
+ hrc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO,
+ (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0,
+ 0);
+ if (hrc == H_SUCCESS)
+ vscsi->client_data.partition_number =
+ be64_to_cpu(*(u64 *)vscsi->map_buf);
+ /*
+ * We expect the VIOCTL to fail if we're configured as "any
+ * client can connect" and the client isn't activated yet.
+ * We'll make the call again when he sends an init msg.
+ */
+ pr_debug("probe hrc %ld, client partition num %d\n",
+ hrc, vscsi->client_data.partition_number);
+
+ tasklet_init(&vscsi->work_task, ibmvscsis_handle_crq,
+ (unsigned long)vscsi);
+
+ init_completion(&vscsi->wait_idle);
+
+ snprintf(wq_name, 24, "ibmvscsis%s", dev_name(&vdev->dev));
+ vscsi->work_q = create_workqueue(wq_name);
+ if (!vscsi->work_q) {
+ rc = -ENOMEM;
+ dev_err(&vscsi->dev, "create_workqueue failed\n");
+ goto unmap_buf;
+ }
+
+ rc = request_irq(vdev->irq, ibmvscsis_interrupt, 0, "ibmvscsis", vscsi);
+ if (rc) {
+ rc = -EPERM;
+ dev_err(&vscsi->dev, "probe: request_irq failed, rc %d\n", rc);
+ goto destroy_WQ;
+ }
+
+ spin_lock_bh(&vscsi->intr_lock);
+ vio_enable_interrupts(vdev);
+ if (rc) {
+ dev_err(&vscsi->dev, "enabling interrupts failed, rc %d\n", rc);
+ rc = -ENODEV;
+ spin_unlock_bh(&vscsi->intr_lock);
+ goto free_irq;
+ }
+
+ if (ibmvscsis_check_q(vscsi)) {
+ rc = ERROR;
+ dev_err(&vscsi->dev, "probe: check_q failed, rc %d\n", rc);
+ spin_unlock_bh(&vscsi->intr_lock);
+ goto disable_interrupt;
+ }
+ spin_unlock_bh(&vscsi->intr_lock);
+
+ dev_set_drvdata(&vdev->dev, vscsi);
+
+ return 0;
+
+disable_interrupt:
+ vio_disable_interrupts(vdev);
+free_irq:
+ free_irq(vdev->irq, vscsi);
+destroy_WQ:
+ destroy_workqueue(vscsi->work_q);
+unmap_buf:
+ dma_unmap_single(&vdev->dev, vscsi->map_ioba, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+free_buf:
+ kfree(vscsi->map_buf);
+destroy_queue:
+ tasklet_kill(&vscsi->work_task);
+ ibmvscsis_unregister_command_q(vscsi);
+ ibmvscsis_destroy_command_q(vscsi);
+free_timer:
+ ibmvscsis_freetimer(vscsi);
+free_cmds:
+ ibmvscsis_free_cmds(vscsi);
+free_target:
+ srp_target_free(&vscsi->target);
+rem_list:
+ spin_lock_bh(&ibmvscsis_dev_lock);
+ list_del(&vscsi->list);
+ spin_unlock_bh(&ibmvscsis_dev_lock);
+free_adapter:
+ kfree(vscsi);
+
+ return rc;
+}
+
+static int ibmvscsis_remove(struct vio_dev *vdev)
+{
+ struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev);
+
+ pr_debug("remove (%s)\n", dev_name(&vscsi->dma_dev->dev));
+
+ /*
+ * TBD: Need to handle if there are commands on the waiting_rsp q
+ * Actually, can there still be cmds outstanding to tcm?
+ */
+
+ vio_disable_interrupts(vdev);
+ free_irq(vdev->irq, vscsi);
+ destroy_workqueue(vscsi->work_q);
+ dma_unmap_single(&vdev->dev, vscsi->map_ioba, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ kfree(vscsi->map_buf);
+ tasklet_kill(&vscsi->work_task);
+ ibmvscsis_unregister_command_q(vscsi);
+ ibmvscsis_destroy_command_q(vscsi);
+ ibmvscsis_freetimer(vscsi);
+ ibmvscsis_free_cmds(vscsi);
+ srp_target_free(&vscsi->target);
+ spin_lock_bh(&ibmvscsis_dev_lock);
+ list_del(&vscsi->list);
+ spin_unlock_bh(&ibmvscsis_dev_lock);
+ kfree(vscsi);
+
+ return 0;
+}
+
+static ssize_t system_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%s\n", system_id);
+}
+
+static ssize_t partition_number_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%x\n", partition_number);
+}
+
+static ssize_t unit_address_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct scsi_info *vscsi = container_of(dev, struct scsi_info, dev);
+
+ return snprintf(buf, PAGE_SIZE, "%x\n", vscsi->dma_dev->unit_address);
+}
+
+static int ibmvscsis_get_system_info(void)
+{
+ struct device_node *rootdn, *vdevdn;
+ const char *id, *model, *name;
+ const uint *num;
+
+ rootdn = of_find_node_by_path("/");
+ if (!rootdn)
+ return -ENOENT;
+
+ model = of_get_property(rootdn, "model", NULL);
+ id = of_get_property(rootdn, "system-id", NULL);
+ if (model && id)
+ snprintf(system_id, sizeof(system_id), "%s-%s", model, id);
+
+ name = of_get_property(rootdn, "ibm,partition-name", NULL);
+ if (name)
+ strncpy(partition_name, name, sizeof(partition_name));
+
+ num = of_get_property(rootdn, "ibm,partition-no", NULL);
+ if (num)
+ partition_number = *num;
+
+ of_node_put(rootdn);
+
+ vdevdn = of_find_node_by_path("/vdevice");
+ if (vdevdn) {
+ const uint *mvds;
+
+ mvds = of_get_property(vdevdn, "ibm,max-virtual-dma-size",
+ NULL);
+ if (mvds)
+ max_vdma_size = *mvds;
+ of_node_put(vdevdn);
+ }
+
+ return 0;
+}
+
+static char *ibmvscsis_get_fabric_name(void)
+{
+ return "ibmvscsis";
+}
+
+static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg)
+{
+ struct ibmvscsis_tport *tport =
+ container_of(se_tpg, struct ibmvscsis_tport, se_tpg);
+
+ return tport->tport_name;
+}
+
+static u16 ibmvscsis_get_tag(struct se_portal_group *se_tpg)
+{
+ struct ibmvscsis_tport *tport =
+ container_of(se_tpg, struct ibmvscsis_tport, se_tpg);
+
+ return tport->tport_tpgt;
+}
+
+static u32 ibmvscsis_get_default_depth(struct se_portal_group *se_tpg)
+{
+ return 1;
+}
+
+static int ibmvscsis_check_true(struct se_portal_group *se_tpg)
+{
+ return 1;
+}
+
+static int ibmvscsis_check_false(struct se_portal_group *se_tpg)
+{
+ return 0;
+}
+
+static u32 ibmvscsis_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+ return 1;
+}
+
+static int ibmvscsis_check_stop_free(struct se_cmd *se_cmd)
+{
+ return target_put_sess_cmd(se_cmd);
+}
+
+static void ibmvscsis_release_cmd(struct se_cmd *se_cmd)
+{
+ struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
+ se_cmd);
+ struct scsi_info *vscsi = cmd->adapter;
+
+ pr_debug("release_cmd %p, flags %d\n", se_cmd, cmd->flags);
+
+ spin_lock_bh(&vscsi->intr_lock);
+ /* Remove from active_q */
+ list_del(&cmd->list);
+ list_add_tail(&cmd->list, &vscsi->waiting_rsp);
+ ibmvscsis_send_messages(vscsi);
+ spin_unlock_bh(&vscsi->intr_lock);
+}
+
+static u32 ibmvscsis_sess_get_index(struct se_session *se_sess)
+{
+ return 0;
+}
+
+static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
+{
+ struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
+ se_cmd);
+ struct iu_entry *iue = cmd->iue;
+ int rc;
+
+ pr_debug("write_pending, se_cmd %p, length 0x%x\n",
+ se_cmd, se_cmd->data_length);
+
+ rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma,
+ 1, 1);
+ if (rc) {
+ pr_err("srp_transfer_data() failed: %d\n", rc);
+ return -EAGAIN;
+ }
+ /*
+ * We now tell TCM to add this WRITE CDB directly into the TCM storage
+ * object execution queue.
+ */
+ target_execute_cmd(se_cmd);
+ return 0;
+}
+
+static int ibmvscsis_write_pending_status(struct se_cmd *se_cmd)
+{
+ return 0;
+}
+
+static void ibmvscsis_set_default_node_attrs(struct se_node_acl *nacl)
+{
+}
+
+static int ibmvscsis_get_cmd_state(struct se_cmd *se_cmd)
+{
+ return 0;
+}
+
+static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd)
+{
+ struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
+ se_cmd);
+ struct iu_entry *iue = cmd->iue;
+ struct scsi_info *vscsi = cmd->adapter;
+ char *sd;
+ uint len = 0;
+ int rc;
+
+ pr_debug("queue_data_in, se_cmd %p, length 0x%x\n",
+ se_cmd, se_cmd->data_length);
+
+ rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, 1,
+ 1);
+ if (rc) {
+ pr_err("srp_transfer_data failed: %d\n", rc);
+ sd = se_cmd->sense_buffer;
+ se_cmd->scsi_sense_length = 18;
+ memset(se_cmd->sense_buffer, 0, se_cmd->scsi_sense_length);
+ /* Logical Unit Communication Time-out asc/ascq = 0x0801 */
+ scsi_build_sense_buffer(0, se_cmd->sense_buffer, MEDIUM_ERROR,
+ 0x08, 0x01);
+ }
+
+ srp_build_response(vscsi, cmd, &len);
+ cmd->rsp.format = SRP_FORMAT;
+ cmd->rsp.len = len;
+
+ return 0;
+}
+
+static int ibmvscsis_queue_status(struct se_cmd *se_cmd)
+{
+ struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
+ se_cmd);
+ struct scsi_info *vscsi = cmd->adapter;
+ uint len;
+
+ pr_debug("queue_status %p\n", se_cmd);
+
+ srp_build_response(vscsi, cmd, &len);
+ cmd->rsp.format = SRP_FORMAT;
+ cmd->rsp.len = len;
+
+ return 0;
+}
+
+static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+ struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd,
+ se_cmd);
+ struct scsi_info *vscsi = cmd->adapter;
+ uint len;
+
+ pr_debug("queue_tm_rsp %p, status %d\n",
+ se_cmd, (int)se_cmd->se_tmr_req->response);
+
+ srp_build_response(vscsi, cmd, &len);
+ cmd->rsp.format = SRP_FORMAT;
+ cmd->rsp.len = len;
+}
+
+static void ibmvscsis_aborted_task(struct se_cmd *se_cmd)
+{
+ /* TBD: What (if anything) should we do here? */
+ pr_debug("ibmvscsis_aborted_task %p\n", se_cmd);
+}
+
+static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
+ struct config_group *group,
+ const char *name)
+{
+ struct ibmvscsis_tport *tport;
+
+ tport = ibmvscsis_lookup_port(name);
+ if (tport) {
+ tport->tport_proto_id = SCSI_PROTOCOL_SRP;
+ pr_debug("make_tport(%s), pointer:%p, tport_id:%x\n",
+ name, tport, tport->tport_proto_id);
+ return &tport->tport_wwn;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static void ibmvscsis_drop_tport(struct se_wwn *wwn)
+{
+ struct ibmvscsis_tport *tport = container_of(wwn,
+ struct ibmvscsis_tport,
+ tport_wwn);
+
+ pr_debug("drop_tport(%s)\n",
+ config_item_name(&tport->tport_wwn.wwn_group.cg_item));
+}
+
+static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
+ struct config_group *group,
+ const char *name)
+{
+ struct ibmvscsis_tport *tport =
+ container_of(wwn, struct ibmvscsis_tport, tport_wwn);
+ int rc;
+
+ tport->releasing = false;
+
+ rc = core_tpg_register(&tport->tport_wwn, &tport->se_tpg,
+ tport->tport_proto_id);
+ if (rc)
+ return ERR_PTR(rc);
+
+ return &tport->se_tpg;
+}
+
+static void ibmvscsis_drop_tpg(struct se_portal_group *se_tpg)
+{
+ struct ibmvscsis_tport *tport = container_of(se_tpg,
+ struct ibmvscsis_tport,
+ se_tpg);
+
+ tport->releasing = true;
+ tport->enabled = false;
+
+ /*
+ * Release the virtual I_T Nexus for this ibmvscsis TPG
+ */
+ ibmvscsis_drop_nexus(tport);
+ /*
+ * Deregister the se_tpg from TCM..
+ */
+ core_tpg_deregister(se_tpg);
+}
+
+static ssize_t ibmvscsis_wwn_version_show(struct config_item *item,
+ char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%s\n", IBMVSCSIS_VERSION);
+}
+CONFIGFS_ATTR_RO(ibmvscsis_wwn_, version);
+
+static struct configfs_attribute *ibmvscsis_wwn_attrs[] = {
+ &ibmvscsis_wwn_attr_version,
+ NULL,
+};
+
+static ssize_t ibmvscsis_tpg_enable_show(struct config_item *item,
+ char *page)
+{
+ struct se_portal_group *se_tpg = to_tpg(item);
+ struct ibmvscsis_tport *tport = container_of(se_tpg,
+ struct ibmvscsis_tport,
+ se_tpg);
+
+ return snprintf(page, PAGE_SIZE, "%d\n", (tport->enabled) ? 1 : 0);
+}
+
+static ssize_t ibmvscsis_tpg_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct se_portal_group *se_tpg = to_tpg(item);
+ struct ibmvscsis_tport *tport = container_of(se_tpg,
+ struct ibmvscsis_tport,
+ se_tpg);
+ struct scsi_info *vscsi = container_of(tport, struct scsi_info, tport);
+ unsigned long tmp;
+ int rc;
+ long lrc;
+
+ rc = kstrtoul(page, 0, &tmp);
+ if (rc < 0) {
+ pr_err("Unable to extract srpt_tpg_store_enable\n");
+ return -EINVAL;
+ }
+
+ if ((tmp != 0) && (tmp != 1)) {
+ pr_err("Illegal value for srpt_tpg_store_enable\n");
+ return -EINVAL;
+ }
+
+ if (tmp) {
+ tport->enabled = true;
+ spin_lock_bh(&vscsi->intr_lock);
+ lrc = ibmvscsis_enable_change_state(vscsi);
+ if (lrc)
+ pr_err("enable_change_state failed, rc %ld state %d\n",
+ lrc, vscsi->state);
+ spin_unlock_bh(&vscsi->intr_lock);
+ } else {
+ tport->enabled = false;
+ }
+
+ pr_debug("tpg_enable_store, state %d\n", vscsi->state);
+
+ return count;
+}
+CONFIGFS_ATTR(ibmvscsis_tpg_, enable);
+
+static struct configfs_attribute *ibmvscsis_tpg_attrs[] = {
+ &ibmvscsis_tpg_attr_enable,
+ NULL,
+};
+
+static const struct target_core_fabric_ops ibmvscsis_ops = {
+ .module = THIS_MODULE,
+ .name = "ibmvscsis",
+ .get_fabric_name = ibmvscsis_get_fabric_name,
+ .tpg_get_wwn = ibmvscsis_get_fabric_wwn,
+ .tpg_get_tag = ibmvscsis_get_tag,
+ .tpg_get_default_depth = ibmvscsis_get_default_depth,
+ .tpg_check_demo_mode = ibmvscsis_check_true,
+ .tpg_check_demo_mode_cache = ibmvscsis_check_true,
+ .tpg_check_demo_mode_write_protect = ibmvscsis_check_false,
+ .tpg_check_prod_mode_write_protect = ibmvscsis_check_false,
+ .tpg_get_inst_index = ibmvscsis_tpg_get_inst_index,
+ .check_stop_free = ibmvscsis_check_stop_free,
+ .release_cmd = ibmvscsis_release_cmd,
+ .sess_get_index = ibmvscsis_sess_get_index,
+ .write_pending = ibmvscsis_write_pending,
+ .write_pending_status = ibmvscsis_write_pending_status,
+ .set_default_node_attributes = ibmvscsis_set_default_node_attrs,
+ .get_cmd_state = ibmvscsis_get_cmd_state,
+ .queue_data_in = ibmvscsis_queue_data_in,
+ .queue_status = ibmvscsis_queue_status,
+ .queue_tm_rsp = ibmvscsis_queue_tm_rsp,
+ .aborted_task = ibmvscsis_aborted_task,
+ /*
+ * Setup function pointers for logic in target_core_fabric_configfs.c
+ */
+ .fabric_make_wwn = ibmvscsis_make_tport,
+ .fabric_drop_wwn = ibmvscsis_drop_tport,
+ .fabric_make_tpg = ibmvscsis_make_tpg,
+ .fabric_drop_tpg = ibmvscsis_drop_tpg,
+
+ .tfc_wwn_attrs = ibmvscsis_wwn_attrs,
+ .tfc_tpg_base_attrs = ibmvscsis_tpg_attrs,
+};
+
+static void ibmvscsis_dev_release(struct device *dev) {};
+
+static struct class_attribute ibmvscsis_class_attrs[] = {
+ __ATTR_NULL,
+};
+
+static struct device_attribute dev_attr_system_id =
+ __ATTR(system_id, S_IRUGO, system_id_show, NULL);
+
+static struct device_attribute dev_attr_partition_number =
+ __ATTR(partition_number, S_IRUGO, partition_number_show, NULL);
+
+static struct device_attribute dev_attr_unit_address =
+ __ATTR(unit_address, S_IRUGO, unit_address_show, NULL);
+
+static struct attribute *ibmvscsis_dev_attrs[] = {
+ &dev_attr_system_id.attr,
+ &dev_attr_partition_number.attr,
+ &dev_attr_unit_address.attr,
+};
+ATTRIBUTE_GROUPS(ibmvscsis_dev);
+
+static struct class ibmvscsis_class = {
+ .name = "ibmvscsis",
+ .dev_release = ibmvscsis_dev_release,
+ .class_attrs = ibmvscsis_class_attrs,
+ .dev_groups = ibmvscsis_dev_groups,
+};
+
+static struct vio_device_id ibmvscsis_device_table[] = {
+ { "v-scsi-host", "IBM,v-scsi-host" },
+ { "", "" }
+};
+MODULE_DEVICE_TABLE(vio, ibmvscsis_device_table);
+
+static struct vio_driver ibmvscsis_driver = {
+ .name = "ibmvscsis",
+ .id_table = ibmvscsis_device_table,
+ .probe = ibmvscsis_probe,
+ .remove = ibmvscsis_remove,
+};
+
+/*
+ * ibmvscsis_init() - Kernel Module initialization
+ *
+ * Note: vio_register_driver() registers callback functions, and at least one
+ * of those callback functions calls TCM - Linux IO Target Subsystem, thus
+ * the SCSI Target template must be registered before vio_register_driver()
+ * is called.
+ */
+static int __init ibmvscsis_init(void)
+{
+ int rc = 0;
+
+ rc = ibmvscsis_get_system_info();
+ if (rc) {
+ pr_err("rc %d from get_system_info\n", rc);
+ goto out;
+ }
+
+ rc = class_register(&ibmvscsis_class);
+ if (rc) {
+ pr_err("failed class register\n");
+ goto out;
+ }
+
+ rc = target_register_template(&ibmvscsis_ops);
+ if (rc) {
+ pr_err("rc %d from target_register_template\n", rc);
+ goto unregister_class;
+ }
+
+ rc = vio_register_driver(&ibmvscsis_driver);
+ if (rc) {
+ pr_err("rc %d from vio_register_driver\n", rc);
+ goto unregister_target;
+ }
+
+ return 0;
+
+unregister_target:
+ target_unregister_template(&ibmvscsis_ops);
+unregister_class:
+ class_unregister(&ibmvscsis_class);
+out:
+ return rc;
+}
+
+static void __exit ibmvscsis_exit(void)
+{
+ pr_info("Unregister IBM virtual SCSI host driver\n");
+ vio_unregister_driver(&ibmvscsis_driver);
+ target_unregister_template(&ibmvscsis_ops);
+ class_unregister(&ibmvscsis_class);
+}
+
+MODULE_DESCRIPTION("IBMVSCSIS fabric driver");
+MODULE_AUTHOR("Bryant G. Ly and Michael Cyr");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(IBMVSCSIS_VERSION);
+module_init(ibmvscsis_init);
+module_exit(ibmvscsis_exit);
--- /dev/null
+/*******************************************************************************
+ * IBM Virtual SCSI Target Driver
+ * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp.
+ * Santiago Leon (santil@us.ibm.com) IBM Corp.
+ * Linda Xie (lxie@us.ibm.com) IBM Corp.
+ *
+ * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org>
+ * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org>
+ * Copyright (C) 2016 Bryant G. Ly <bryantly@linux.vnet.ibm.com> IBM Corp.
+ *
+ * Authors: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ * Authors: Michael Cyr <mikecyr@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ ****************************************************************************/
+
+#ifndef __H_IBMVSCSI_TGT
+#define __H_IBMVSCSI_TGT
+
+#include "libsrp.h"
+
+#define SYS_ID_NAME_LEN 64
+#define PARTITION_NAMELEN 96
+#define IBMVSCSIS_NAMELEN 32
+
+#define MSG_HI 0
+#define MSG_LOW 1
+
+#define MAX_CMD_Q_PAGES 4
+#define CRQ_PER_PAGE (PAGE_SIZE / sizeof(struct viosrp_crq))
+/* in terms of number of elements */
+#define DEFAULT_CMD_Q_SIZE CRQ_PER_PAGE
+#define MAX_CMD_Q_SIZE (DEFAULT_CMD_Q_SIZE * MAX_CMD_Q_PAGES)
+
+#define SRP_VIOLATION 0x102 /* general error code */
+
+/*
+ * SRP buffer formats defined as of 16.a supported by this driver.
+ */
+#define SUPPORTED_FORMATS ((SRP_DATA_DESC_DIRECT << 1) | \
+ (SRP_DATA_DESC_INDIRECT << 1))
+
+#define SCSI_LUN_ADDR_METHOD_FLAT 1
+
+struct dma_window {
+ u32 liobn; /* Unique per vdevice */
+ u64 tce_base; /* Physical location of the TCE table */
+ u64 tce_size; /* Size of the TCE table in bytes */
+};
+
+struct target_dds {
+ u64 unit_id; /* 64 bit will force alignment */
+#define NUM_DMA_WINDOWS 2
+#define LOCAL 0
+#define REMOTE 1
+ struct dma_window window[NUM_DMA_WINDOWS];
+
+ /* root node property "ibm,partition-no" */
+ uint partition_num;
+ char partition_name[PARTITION_NAMELEN];
+};
+
+#define MAX_NUM_PORTS 1
+#define MAX_H_COPY_RDMA (128 * 1024)
+
+#define MAX_EYE 64
+
+/* Return codes */
+#define ADAPT_SUCCESS 0L
+/* choose error codes that do not conflict with PHYP */
+#define ERROR -40L
+
+struct format_code {
+ u8 reserved;
+ u8 buffers;
+};
+
+struct client_info {
+#define SRP_VERSION "16.a"
+ char srp_version[8];
+ /* root node property ibm,partition-name */
+ char partition_name[PARTITION_NAMELEN];
+ /* root node property ibm,partition-no */
+ u32 partition_number;
+ /* initially 1 */
+ u32 mad_version;
+ u32 os_type;
+};
+
+/*
+ * Changing this constant changes the number of seconds to wait before
+ * considering the client will never service its queue again.
+ */
+#define SECONDS_TO_CONSIDER_FAILED 30
+/*
+ * These constants set the polling period used to determine if the client
+ * has freed at least one element in the response queue.
+ */
+#define WAIT_SECONDS 1
+#define WAIT_NANO_SECONDS 5000
+#define MAX_TIMER_POPS ((1000000 / WAIT_NANO_SECONDS) * \
+ SECONDS_TO_CONSIDER_FAILED)
+/*
+ * general purpose timer control block
+ * which can be used for multiple functions
+ */
+struct timer_cb {
+ struct hrtimer timer;
+ /*
+ * how long has it been since the client
+ * serviced the queue. The variable is incrmented
+ * in the service_wait_q routine and cleared
+ * in send messages
+ */
+ int timer_pops;
+ /* the timer is started */
+ bool started;
+};
+
+struct cmd_queue {
+ /* kva */
+ struct viosrp_crq *base_addr;
+ dma_addr_t crq_token;
+ /* used to maintain index */
+ uint mask;
+ /* current element */
+ uint index;
+ int size;
+};
+
+#define SCSOLNT_RESP_SHIFT 1
+#define UCSOLNT_RESP_SHIFT 2
+
+#define SCSOLNT BIT(SCSOLNT_RESP_SHIFT)
+#define UCSOLNT BIT(UCSOLNT_RESP_SHIFT)
+
+enum cmd_type {
+ SCSI_CDB = 0x01,
+ TASK_MANAGEMENT = 0x02,
+ /* MAD or addressed to port 0 */
+ ADAPTER_MAD = 0x04,
+ UNSET_TYPE = 0x08,
+};
+
+struct iu_rsp {
+ u8 format;
+ u8 sol_not;
+ u16 len;
+ /* tag is just to help client identify cmd, so don't translate be/le */
+ u64 tag;
+};
+
+struct ibmvscsis_cmd {
+ struct list_head list;
+ /* Used for TCM Core operations */
+ struct se_cmd se_cmd;
+ struct iu_entry *iue;
+ struct iu_rsp rsp;
+ struct work_struct work;
+ struct scsi_info *adapter;
+ /* Sense buffer that will be mapped into outgoing status */
+ unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
+ u64 init_time;
+#define CMD_FAST_FAIL BIT(0)
+ u32 flags;
+ char type;
+};
+
+struct ibmvscsis_nexus {
+ struct se_session *se_sess;
+};
+
+struct ibmvscsis_tport {
+ /* SCSI protocol the tport is providing */
+ u8 tport_proto_id;
+ /* ASCII formatted WWPN for SRP Target port */
+ char tport_name[IBMVSCSIS_NAMELEN];
+ /* Returned by ibmvscsis_make_tport() */
+ struct se_wwn tport_wwn;
+ /* Returned by ibmvscsis_make_tpg() */
+ struct se_portal_group se_tpg;
+ /* ibmvscsis port target portal group tag for TCM */
+ u16 tport_tpgt;
+ /* Pointer to TCM session for I_T Nexus */
+ struct ibmvscsis_nexus *ibmv_nexus;
+ bool enabled;
+ bool releasing;
+};
+
+struct scsi_info {
+ struct list_head list;
+ char eye[MAX_EYE];
+
+ /* commands waiting for space on repsonse queue */
+ struct list_head waiting_rsp;
+#define NO_QUEUE 0x00
+#define WAIT_ENABLED 0X01
+ /* driver has received an initialize command */
+#define PART_UP_WAIT_ENAB 0x02
+#define WAIT_CONNECTION 0x04
+ /* have established a connection */
+#define CONNECTED 0x08
+ /* at least one port is processing SRP IU */
+#define SRP_PROCESSING 0x10
+ /* remove request received */
+#define UNCONFIGURING 0x20
+ /* disconnect by letting adapter go idle, no error */
+#define WAIT_IDLE 0x40
+ /* disconnecting to clear an error */
+#define ERR_DISCONNECT 0x80
+ /* disconnect to clear error state, then come back up */
+#define ERR_DISCONNECT_RECONNECT 0x100
+ /* disconnected after clearing an error */
+#define ERR_DISCONNECTED 0x200
+ /* A series of errors caused unexpected errors */
+#define UNDEFINED 0x400
+ u16 state;
+ int fast_fail;
+ struct target_dds dds;
+ char *cmd_pool;
+ /* list of free commands */
+ struct list_head free_cmd;
+ /* command elements ready for scheduler */
+ struct list_head schedule_q;
+ /* commands sent to TCM */
+ struct list_head active_q;
+ caddr_t *map_buf;
+ /* ioba of map buffer */
+ dma_addr_t map_ioba;
+ /* allowable number of outstanding SRP requests */
+ int request_limit;
+ /* extra credit */
+ int credit;
+ /* outstanding transactions against credit limit */
+ int debit;
+
+ /* allow only one outstanding mad request */
+#define PROCESSING_MAD 0x00002
+ /* Waiting to go idle */
+#define WAIT_FOR_IDLE 0x00004
+ /* H_REG_CRQ called */
+#define CRQ_CLOSED 0x00010
+ /* detected that client has failed */
+#define CLIENT_FAILED 0x00040
+ /* detected that transport event occurred */
+#define TRANS_EVENT 0x00080
+ /* don't attempt to send anything to the client */
+#define RESPONSE_Q_DOWN 0x00100
+ /* request made to schedule disconnect handler */
+#define SCHEDULE_DISCONNECT 0x00400
+ /* disconnect handler is scheduled */
+#define DISCONNECT_SCHEDULED 0x00800
+ u32 flags;
+ /* adapter lock */
+ spinlock_t intr_lock;
+ /* information needed to manage command queue */
+ struct cmd_queue cmd_q;
+ /* used in hcall to copy response back into srp buffer */
+ u64 empty_iu_id;
+ /* used in crq, to tag what iu the response is for */
+ u64 empty_iu_tag;
+ uint new_state;
+ /* control block for the response queue timer */
+ struct timer_cb rsp_q_timer;
+ /* keep last client to enable proper accounting */
+ struct client_info client_data;
+ /* what can this client do */
+ u32 client_cap;
+ /*
+ * The following two fields capture state and flag changes that
+ * can occur when the lock is given up. In the orginal design,
+ * the lock was held during calls into phyp;
+ * however, phyp did not meet PAPR architecture. This is
+ * a work around.
+ */
+ u16 phyp_acr_state;
+ u32 phyp_acr_flags;
+
+ struct workqueue_struct *work_q;
+ struct completion wait_idle;
+ struct device dev;
+ struct vio_dev *dma_dev;
+ struct srp_target target;
+ struct ibmvscsis_tport tport;
+ struct tasklet_struct work_task;
+ struct work_struct proc_work;
+};
+
+/*
+ * Provide a constant that allows software to detect the adapter is
+ * disconnecting from the client from one of several states.
+ */
+#define IS_DISCONNECTING (UNCONFIGURING | ERR_DISCONNECT_RECONNECT | \
+ ERR_DISCONNECT)
+
+/*
+ * Provide a constant that can be used with interrupt handling that
+ * essentially lets the interrupt handler know that all requests should
+ * be thrown out,
+ */
+#define DONT_PROCESS_STATE (IS_DISCONNECTING | UNDEFINED | \
+ ERR_DISCONNECTED | WAIT_IDLE)
+
+/*
+ * If any of these flag bits are set then do not allow the interrupt
+ * handler to schedule the off level handler.
+ */
+#define BLOCK (DISCONNECT_SCHEDULED)
+
+/* State and transition events that stop the interrupt handler */
+#define TARGET_STOP(VSCSI) (long)(((VSCSI)->state & DONT_PROCESS_STATE) | \
+ ((VSCSI)->flags & BLOCK))
+
+/* flag bit that are not reset during disconnect */
+#define PRESERVE_FLAG_FIELDS 0
+
+#define vio_iu(IUE) ((union viosrp_iu *)((IUE)->sbuf->buf))
+
+#define READ_CMD(cdb) (((cdb)[0] & 0x1F) == 8)
+#define WRITE_CMD(cdb) (((cdb)[0] & 0x1F) == 0xA)
+
+#ifndef H_GET_PARTNER_INFO
+#define H_GET_PARTNER_INFO 0x0000000000000008LL
+#endif
+
+#define h_copy_rdma(l, sa, sb, da, db) \
+ plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db)
+#define h_vioctl(u, o, a, u1, u2, u3, u4) \
+ plpar_hcall_norets(H_VIOCTL, u, o, a, u1, u2)
+#define h_reg_crq(ua, tok, sz) \
+ plpar_hcall_norets(H_REG_CRQ, ua, tok, sz)
+#define h_free_crq(ua) \
+ plpar_hcall_norets(H_FREE_CRQ, ua)
+#define h_send_crq(ua, d1, d2) \
+ plpar_hcall_norets(H_SEND_CRQ, ua, d1, d2)
+
+#endif
--- /dev/null
+/*******************************************************************************
+ * SCSI RDMA Protocol lib functions
+ *
+ * Copyright (C) 2006 FUJITA Tomonori <tomof@acm.org>
+ * Copyright (C) 2016 Bryant G. Ly <bryantly@linux.vnet.ibm.com> IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ ***********************************************************************/
+
+#define pr_fmt(fmt) "libsrp: " fmt
+
+#include <linux/printk.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <scsi/srp.h>
+#include <target/target_core_base.h>
+#include "libsrp.h"
+#include "ibmvscsi_tgt.h"
+
+static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
+ struct srp_buf **ring)
+{
+ struct iu_entry *iue;
+ int i;
+
+ q->pool = kcalloc(max, sizeof(struct iu_entry *), GFP_KERNEL);
+ if (!q->pool)
+ return -ENOMEM;
+ q->items = kcalloc(max, sizeof(struct iu_entry), GFP_KERNEL);
+ if (!q->items)
+ goto free_pool;
+
+ spin_lock_init(&q->lock);
+ kfifo_init(&q->queue, (void *)q->pool, max * sizeof(void *));
+
+ for (i = 0, iue = q->items; i < max; i++) {
+ kfifo_in(&q->queue, (void *)&iue, sizeof(void *));
+ iue->sbuf = ring[i];
+ iue++;
+ }
+ return 0;
+
+free_pool:
+ kfree(q->pool);
+ return -ENOMEM;
+}
+
+static void srp_iu_pool_free(struct srp_queue *q)
+{
+ kfree(q->items);
+ kfree(q->pool);
+}
+
+static struct srp_buf **srp_ring_alloc(struct device *dev,
+ size_t max, size_t size)
+{
+ struct srp_buf **ring;
+ int i;
+
+ ring = kcalloc(max, sizeof(struct srp_buf *), GFP_KERNEL);
+ if (!ring)
+ return NULL;
+
+ for (i = 0; i < max; i++) {
+ ring[i] = kzalloc(sizeof(*ring[i]), GFP_KERNEL);
+ if (!ring[i])
+ goto out;
+ ring[i]->buf = dma_alloc_coherent(dev, size, &ring[i]->dma,
+ GFP_KERNEL);
+ if (!ring[i]->buf)
+ goto out;
+ }
+ return ring;
+
+out:
+ for (i = 0; i < max && ring[i]; i++) {
+ if (ring[i]->buf) {
+ dma_free_coherent(dev, size, ring[i]->buf,
+ ring[i]->dma);
+ }
+ kfree(ring[i]);
+ }
+ kfree(ring);
+
+ return NULL;
+}
+
+static void srp_ring_free(struct device *dev, struct srp_buf **ring,
+ size_t max, size_t size)
+{
+ int i;
+
+ for (i = 0; i < max; i++) {
+ dma_free_coherent(dev, size, ring[i]->buf, ring[i]->dma);
+ kfree(ring[i]);
+ }
+ kfree(ring);
+}
+
+int srp_target_alloc(struct srp_target *target, struct device *dev,
+ size_t nr, size_t iu_size)
+{
+ int err;
+
+ spin_lock_init(&target->lock);
+
+ target->dev = dev;
+
+ target->srp_iu_size = iu_size;
+ target->rx_ring_size = nr;
+ target->rx_ring = srp_ring_alloc(target->dev, nr, iu_size);
+ if (!target->rx_ring)
+ return -ENOMEM;
+ err = srp_iu_pool_alloc(&target->iu_queue, nr, target->rx_ring);
+ if (err)
+ goto free_ring;
+
+ dev_set_drvdata(target->dev, target);
+ return 0;
+
+free_ring:
+ srp_ring_free(target->dev, target->rx_ring, nr, iu_size);
+ return -ENOMEM;
+}
+
+void srp_target_free(struct srp_target *target)
+{
+ dev_set_drvdata(target->dev, NULL);
+ srp_ring_free(target->dev, target->rx_ring, target->rx_ring_size,
+ target->srp_iu_size);
+ srp_iu_pool_free(&target->iu_queue);
+}
+
+struct iu_entry *srp_iu_get(struct srp_target *target)
+{
+ struct iu_entry *iue = NULL;
+
+ if (kfifo_out_locked(&target->iu_queue.queue, (void *)&iue,
+ sizeof(void *),
+ &target->iu_queue.lock) != sizeof(void *)) {
+ WARN_ONCE(1, "unexpected fifo state");
+ return NULL;
+ }
+ if (!iue)
+ return iue;
+ iue->target = target;
+ iue->flags = 0;
+ return iue;
+}
+
+void srp_iu_put(struct iu_entry *iue)
+{
+ kfifo_in_locked(&iue->target->iu_queue.queue, (void *)&iue,
+ sizeof(void *), &iue->target->iu_queue.lock);
+}
+
+static int srp_direct_data(struct ibmvscsis_cmd *cmd, struct srp_direct_buf *md,
+ enum dma_data_direction dir, srp_rdma_t rdma_io,
+ int dma_map, int ext_desc)
+{
+ struct iu_entry *iue = NULL;
+ struct scatterlist *sg = NULL;
+ int err, nsg = 0, len;
+
+ if (dma_map) {
+ iue = cmd->iue;
+ sg = cmd->se_cmd.t_data_sg;
+ nsg = dma_map_sg(iue->target->dev, sg, cmd->se_cmd.t_data_nents,
+ DMA_BIDIRECTIONAL);
+ if (!nsg) {
+ pr_err("fail to map %p %d\n", iue,
+ cmd->se_cmd.t_data_nents);
+ return 0;
+ }
+ len = min(cmd->se_cmd.data_length, be32_to_cpu(md->len));
+ } else {
+ len = be32_to_cpu(md->len);
+ }
+
+ err = rdma_io(cmd, sg, nsg, md, 1, dir, len);
+
+ if (dma_map)
+ dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL);
+
+ return err;
+}
+
+static int srp_indirect_data(struct ibmvscsis_cmd *cmd, struct srp_cmd *srp_cmd,
+ struct srp_indirect_buf *id,
+ enum dma_data_direction dir, srp_rdma_t rdma_io,
+ int dma_map, int ext_desc)
+{
+ struct iu_entry *iue = NULL;
+ struct srp_direct_buf *md = NULL;
+ struct scatterlist dummy, *sg = NULL;
+ dma_addr_t token = 0;
+ int err = 0;
+ int nmd, nsg = 0, len;
+
+ if (dma_map || ext_desc) {
+ iue = cmd->iue;
+ sg = cmd->se_cmd.t_data_sg;
+ }
+
+ nmd = be32_to_cpu(id->table_desc.len) / sizeof(struct srp_direct_buf);
+
+ if ((dir == DMA_FROM_DEVICE && nmd == srp_cmd->data_in_desc_cnt) ||
+ (dir == DMA_TO_DEVICE && nmd == srp_cmd->data_out_desc_cnt)) {
+ md = &id->desc_list[0];
+ goto rdma;
+ }
+
+ if (ext_desc && dma_map) {
+ md = dma_alloc_coherent(iue->target->dev,
+ be32_to_cpu(id->table_desc.len),
+ &token, GFP_KERNEL);
+ if (!md) {
+ pr_err("Can't get dma memory %u\n",
+ be32_to_cpu(id->table_desc.len));
+ return -ENOMEM;
+ }
+
+ sg_init_one(&dummy, md, be32_to_cpu(id->table_desc.len));
+ sg_dma_address(&dummy) = token;
+ sg_dma_len(&dummy) = be32_to_cpu(id->table_desc.len);
+ err = rdma_io(cmd, &dummy, 1, &id->table_desc, 1, DMA_TO_DEVICE,
+ be32_to_cpu(id->table_desc.len));
+ if (err) {
+ pr_err("Error copying indirect table %d\n", err);
+ goto free_mem;
+ }
+ } else {
+ pr_err("This command uses external indirect buffer\n");
+ return -EINVAL;
+ }
+
+rdma:
+ if (dma_map) {
+ nsg = dma_map_sg(iue->target->dev, sg, cmd->se_cmd.t_data_nents,
+ DMA_BIDIRECTIONAL);
+ if (!nsg) {
+ pr_err("fail to map %p %d\n", iue,
+ cmd->se_cmd.t_data_nents);
+ err = -EIO;
+ goto free_mem;
+ }
+ len = min(cmd->se_cmd.data_length, be32_to_cpu(id->len));
+ } else {
+ len = be32_to_cpu(id->len);
+ }
+
+ err = rdma_io(cmd, sg, nsg, md, nmd, dir, len);
+
+ if (dma_map)
+ dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL);
+
+free_mem:
+ if (token && dma_map) {
+ dma_free_coherent(iue->target->dev,
+ be32_to_cpu(id->table_desc.len), md, token);
+ }
+ return err;
+}
+
+static int data_out_desc_size(struct srp_cmd *cmd)
+{
+ int size = 0;
+ u8 fmt = cmd->buf_fmt >> 4;
+
+ switch (fmt) {
+ case SRP_NO_DATA_DESC:
+ break;
+ case SRP_DATA_DESC_DIRECT:
+ size = sizeof(struct srp_direct_buf);
+ break;
+ case SRP_DATA_DESC_INDIRECT:
+ size = sizeof(struct srp_indirect_buf) +
+ sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt;
+ break;
+ default:
+ pr_err("client error. Invalid data_out_format %x\n", fmt);
+ break;
+ }
+ return size;
+}
+
+/*
+ * TODO: this can be called multiple times for a single command if it
+ * has very long data.
+ */
+int srp_transfer_data(struct ibmvscsis_cmd *cmd, struct srp_cmd *srp_cmd,
+ srp_rdma_t rdma_io, int dma_map, int ext_desc)
+{
+ struct srp_direct_buf *md;
+ struct srp_indirect_buf *id;
+ enum dma_data_direction dir;
+ int offset, err = 0;
+ u8 format;
+
+ if (!cmd->se_cmd.t_data_nents)
+ return 0;
+
+ offset = srp_cmd->add_cdb_len & ~3;
+
+ dir = srp_cmd_direction(srp_cmd);
+ if (dir == DMA_FROM_DEVICE)
+ offset += data_out_desc_size(srp_cmd);
+
+ if (dir == DMA_TO_DEVICE)
+ format = srp_cmd->buf_fmt >> 4;
+ else
+ format = srp_cmd->buf_fmt & ((1U << 4) - 1);
+
+ switch (format) {
+ case SRP_NO_DATA_DESC:
+ break;
+ case SRP_DATA_DESC_DIRECT:
+ md = (struct srp_direct_buf *)(srp_cmd->add_data + offset);
+ err = srp_direct_data(cmd, md, dir, rdma_io, dma_map, ext_desc);
+ break;
+ case SRP_DATA_DESC_INDIRECT:
+ id = (struct srp_indirect_buf *)(srp_cmd->add_data + offset);
+ err = srp_indirect_data(cmd, srp_cmd, id, dir, rdma_io, dma_map,
+ ext_desc);
+ break;
+ default:
+ pr_err("Unknown format %d %x\n", dir, format);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+u64 srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir)
+{
+ struct srp_direct_buf *md;
+ struct srp_indirect_buf *id;
+ u64 len = 0;
+ uint offset = cmd->add_cdb_len & ~3;
+ u8 fmt;
+
+ if (dir == DMA_TO_DEVICE) {
+ fmt = cmd->buf_fmt >> 4;
+ } else {
+ fmt = cmd->buf_fmt & ((1U << 4) - 1);
+ offset += data_out_desc_size(cmd);
+ }
+
+ switch (fmt) {
+ case SRP_NO_DATA_DESC:
+ break;
+ case SRP_DATA_DESC_DIRECT:
+ md = (struct srp_direct_buf *)(cmd->add_data + offset);
+ len = be32_to_cpu(md->len);
+ break;
+ case SRP_DATA_DESC_INDIRECT:
+ id = (struct srp_indirect_buf *)(cmd->add_data + offset);
+ len = be32_to_cpu(id->len);
+ break;
+ default:
+ pr_err("invalid data format %x\n", fmt);
+ break;
+ }
+ return len;
+}
+
+int srp_get_desc_table(struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
+ u64 *data_len)
+{
+ struct srp_indirect_buf *idb;
+ struct srp_direct_buf *db;
+ uint add_cdb_offset;
+ int rc;
+
+ /*
+ * The pointer computations below will only be compiled correctly
+ * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
+ * whether srp_cmd::add_data has been declared as a byte pointer.
+ */
+ BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
+ && !__same_type(srp_cmd->add_data[0], (u8)0));
+
+ BUG_ON(!dir);
+ BUG_ON(!data_len);
+
+ rc = 0;
+ *data_len = 0;
+
+ *dir = DMA_NONE;
+
+ if (srp_cmd->buf_fmt & 0xf)
+ *dir = DMA_FROM_DEVICE;
+ else if (srp_cmd->buf_fmt >> 4)
+ *dir = DMA_TO_DEVICE;
+
+ add_cdb_offset = srp_cmd->add_cdb_len & ~3;
+ if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
+ ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
+ db = (struct srp_direct_buf *)(srp_cmd->add_data
+ + add_cdb_offset);
+ *data_len = be32_to_cpu(db->len);
+ } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
+ ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
+ idb = (struct srp_indirect_buf *)(srp_cmd->add_data
+ + add_cdb_offset);
+
+ *data_len = be32_to_cpu(idb->len);
+ }
+ return rc;
+}
+
+MODULE_DESCRIPTION("SCSI RDMA Protocol lib functions");
+MODULE_AUTHOR("FUJITA Tomonori");
+MODULE_LICENSE("GPL");
--- /dev/null
+#ifndef __LIBSRP_H__
+#define __LIBSRP_H__
+
+#include <linux/list.h>
+#include <linux/kfifo.h>
+#include <scsi/srp.h>
+
+enum srp_valid {
+ INVALIDATE_CMD_RESP_EL = 0,
+ VALID_CMD_RESP_EL = 0x80,
+ VALID_INIT_MSG = 0xC0,
+ VALID_TRANS_EVENT = 0xFF
+};
+
+enum srp_format {
+ SRP_FORMAT = 1,
+ MAD_FORMAT = 2,
+ OS400_FORMAT = 3,
+ AIX_FORMAT = 4,
+ LINUX_FORMAT = 5,
+ MESSAGE_IN_CRQ = 6
+};
+
+enum srp_init_msg {
+ INIT_MSG = 1,
+ INIT_COMPLETE_MSG = 2
+};
+
+enum srp_trans_event {
+ UNUSED_FORMAT = 0,
+ PARTNER_FAILED = 1,
+ PARTNER_DEREGISTER = 2,
+ MIGRATED = 6
+};
+
+enum srp_status {
+ HEADER_DESCRIPTOR = 0xF1,
+ PING = 0xF5,
+ PING_RESPONSE = 0xF6
+};
+
+enum srp_mad_version {
+ MAD_VERSION_1 = 1
+};
+
+enum srp_os_type {
+ OS400 = 1,
+ LINUX = 2,
+ AIX = 3,
+ OFW = 4
+};
+
+enum srp_task_attributes {
+ SRP_SIMPLE_TASK = 0,
+ SRP_HEAD_TASK = 1,
+ SRP_ORDERED_TASK = 2,
+ SRP_ACA_TASK = 4
+};
+
+enum {
+ SRP_TASK_MANAGEMENT_FUNCTION_COMPLETE = 0,
+ SRP_REQUEST_FIELDS_INVALID = 2,
+ SRP_TASK_MANAGEMENT_FUNCTION_NOT_SUPPORTED = 4,
+ SRP_TASK_MANAGEMENT_FUNCTION_FAILED = 5
+};
+
+struct srp_buf {
+ dma_addr_t dma;
+ void *buf;
+};
+
+struct srp_queue {
+ void *pool;
+ void *items;
+ struct kfifo queue;
+ spinlock_t lock;
+};
+
+struct srp_target {
+ struct device *dev;
+
+ spinlock_t lock;
+ struct list_head cmd_queue;
+
+ size_t srp_iu_size;
+ struct srp_queue iu_queue;
+ size_t rx_ring_size;
+ struct srp_buf **rx_ring;
+
+ void *ldata;
+};
+
+struct iu_entry {
+ struct srp_target *target;
+
+ struct list_head ilist;
+ dma_addr_t remote_token;
+ unsigned long flags;
+
+ struct srp_buf *sbuf;
+ u16 iu_len;
+};
+
+struct ibmvscsis_cmd;
+
+typedef int (srp_rdma_t)(struct ibmvscsis_cmd *, struct scatterlist *, int,
+ struct srp_direct_buf *, int,
+ enum dma_data_direction, unsigned int);
+int srp_target_alloc(struct srp_target *, struct device *, size_t, size_t);
+void srp_target_free(struct srp_target *);
+struct iu_entry *srp_iu_get(struct srp_target *);
+void srp_iu_put(struct iu_entry *);
+int srp_transfer_data(struct ibmvscsis_cmd *, struct srp_cmd *,
+ srp_rdma_t, int, int);
+u64 srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir);
+int srp_get_desc_table(struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
+ u64 *data_len);
+static inline int srp_cmd_direction(struct srp_cmd *cmd)
+{
+ return (cmd->buf_fmt >> 4) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+}
+
+#endif
config USB_EMXX
- bool "EMXX USB Function Device Controller"
+ tristate "EMXX USB Function Device Controller"
depends on USB_GADGET && (ARCH_SHMOBILE || (ARM && COMPILE_TEST))
help
The Emma Mobile series of SoCs from Renesas Electronics and
*/
#include <linux/kernel.h>
-#include <linux/init.h>
+#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/ioport.h>
#include "emxx_udc.h"
+#define DRIVER_DESC "EMXX UDC driver"
#define DMA_ADDR_INVALID (~(dma_addr_t)0)
static const char driver_name[] = "emxx_udc";
+static const char driver_desc[] = DRIVER_DESC;
/*===========================================================================*/
/* Prototype */
_nbu2ss_disable_controller(udc);
}
+/*-------------------------------------------------------------------------*/
+static int nbu2ss_drv_remove(struct platform_device *pdev)
+{
+ struct nbu2ss_udc *udc;
+ struct nbu2ss_ep *ep;
+ int i;
+
+ udc = &udc_controller;
+
+ for (i = 0; i < NUM_ENDPOINTS; i++) {
+ ep = &udc->ep[i];
+ if (ep->virt_buf)
+ dma_free_coherent(NULL, PAGE_SIZE,
+ (void *)ep->virt_buf, ep->phys_buf);
+ }
+
+ /* Interrupt Handler - Release */
+ free_irq(INT_VBUS, udc);
+
+ return 0;
+}
+
/*-------------------------------------------------------------------------*/
static int nbu2ss_drv_suspend(struct platform_device *pdev, pm_message_t state)
{
static struct platform_driver udc_driver = {
.probe = nbu2ss_drv_probe,
.shutdown = nbu2ss_drv_shutdown,
+ .remove = nbu2ss_drv_remove,
.suspend = nbu2ss_drv_suspend,
.resume = nbu2ss_drv_resume,
.driver = {
- .name = driver_name,
- .suppress_bind_attrs = true,
+ .name = driver_name,
},
};
-builtin_platform_driver(udc_driver);
+module_platform_driver(udc_driver);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Renesas Electronics Corporation");
+MODULE_LICENSE("GPL");
bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD);
spin_lock_bh(&conn->cmd_lock);
- if (!list_empty(&cmd->i_conn_node))
+ if (!list_empty(&cmd->i_conn_node) &&
+ !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP))
list_del_init(&cmd->i_conn_node);
spin_unlock_bh(&conn->cmd_lock);
static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
{
+ LIST_HEAD(tmp_list);
struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL;
struct iscsi_session *sess = conn->sess;
/*
* has been reset -> returned sleeping pre-handler state.
*/
spin_lock_bh(&conn->cmd_lock);
- list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) {
+ list_splice_init(&conn->conn_cmd_list, &tmp_list);
+ list_for_each_entry(cmd, &tmp_list, i_conn_node) {
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+
+ if (se_cmd->se_tfo != NULL) {
+ spin_lock(&se_cmd->t_state_lock);
+ se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+ spin_unlock(&se_cmd->t_state_lock);
+ }
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) {
list_del_init(&cmd->i_conn_node);
- spin_unlock_bh(&conn->cmd_lock);
iscsit_increment_maxcmdsn(cmd, sess);
-
iscsit_free_cmd(cmd, true);
- spin_lock_bh(&conn->cmd_lock);
}
- spin_unlock_bh(&conn->cmd_lock);
}
static void iscsit_stop_timers_for_cmds(
}
login->zero_tsih = zero_tsih;
- conn->sess->se_sess->sup_prot_ops =
- conn->conn_transport->iscsit_get_sup_prot_ops(conn);
+ if (conn->sess)
+ conn->sess->se_sess->sup_prot_ops =
+ conn->conn_transport->iscsit_get_sup_prot_ops(conn);
tpg = conn->tpg;
if (!tpg) {
* in ATA and we need to set TPE=1
*/
bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
- struct request_queue *q, int block_size)
+ struct request_queue *q)
{
+ int block_size = queue_logical_block_size(q);
+
if (!blk_queue_discard(q))
return false;
- attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
- block_size;
+ attrib->max_unmap_lba_count =
+ q->limits.max_discard_sectors >> (ilog2(block_size) - 9);
/*
* Currently hardcoded to 1 in Linux/SCSI code..
*/
dev_size, div_u64(dev_size, fd_dev->fd_block_size),
fd_dev->fd_block_size);
- if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
- fd_dev->fd_block_size))
+ if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
pr_debug("IFILE: BLOCK Discard support available,"
" disabled by default\n");
/*
*/
if (cmd->data_length > FD_MAX_BYTES) {
pr_err("FILEIO: Not able to process I/O of %u bytes due to"
- "FD_MAX_BYTES: %u iovec count limitiation\n",
+ "FD_MAX_BYTES: %u iovec count limitation\n",
cmd->data_length, FD_MAX_BYTES);
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
dev->dev_attrib.hw_queue_depth = q->nr_requests;
- if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
- dev->dev_attrib.hw_block_size))
+ if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
pr_debug("IBLOCK: BLOCK Discard support available,"
" disabled by default\n");
void target_qf_do_work(struct work_struct *work);
bool target_check_wce(struct se_device *dev);
bool target_check_fua(struct se_device *dev);
+void __target_execute_cmd(struct se_cmd *, bool);
/* target_core_stat.c */
void target_stat_setup_dev_default_groups(struct se_device *);
cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
spin_unlock_irq(&cmd->t_state_lock);
- __target_execute_cmd(cmd);
+ __target_execute_cmd(cmd, false);
kfree(buf);
return ret;
void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
{
- if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) {
+ if (scsi_status != SAM_STAT_GOOD) {
+ return;
+ }
+
+ /*
+ * Calculate new residual count based upon length of SCSI data
+ * transferred.
+ */
+ if (length < cmd->data_length) {
if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
cmd->residual_count += cmd->data_length - length;
} else {
}
cmd->data_length = length;
+ } else if (length > cmd->data_length) {
+ cmd->se_cmd_flags |= SCF_OVERFLOW_BIT;
+ cmd->residual_count = length - cmd->data_length;
+ } else {
+ cmd->se_cmd_flags &= ~(SCF_OVERFLOW_BIT | SCF_UNDERFLOW_BIT);
+ cmd->residual_count = 0;
}
target_complete_cmd(cmd, scsi_status);
trace_target_sequencer_start(cmd);
- /*
- * Check for an existing UNIT ATTENTION condition
- */
- ret = target_scsi3_ua_check(cmd);
- if (ret)
- return ret;
-
- ret = target_alua_state_check(cmd);
- if (ret)
- return ret;
-
- ret = target_check_reservation(cmd);
- if (ret) {
- cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
- return ret;
- }
-
ret = dev->transport->parse_cdb(cmd);
if (ret == TCM_UNSUPPORTED_SCSI_OPCODE)
pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n",
}
EXPORT_SYMBOL(transport_generic_request_failure);
-void __target_execute_cmd(struct se_cmd *cmd)
+void __target_execute_cmd(struct se_cmd *cmd, bool do_checks)
{
sense_reason_t ret;
- if (cmd->execute_cmd) {
- ret = cmd->execute_cmd(cmd);
- if (ret) {
- spin_lock_irq(&cmd->t_state_lock);
- cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
- spin_unlock_irq(&cmd->t_state_lock);
+ if (!cmd->execute_cmd) {
+ ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ goto err;
+ }
+ if (do_checks) {
+ /*
+ * Check for an existing UNIT ATTENTION condition after
+ * target_handle_task_attr() has done SAM task attr
+ * checking, and possibly have already defered execution
+ * out to target_restart_delayed_cmds() context.
+ */
+ ret = target_scsi3_ua_check(cmd);
+ if (ret)
+ goto err;
- transport_generic_request_failure(cmd, ret);
+ ret = target_alua_state_check(cmd);
+ if (ret)
+ goto err;
+
+ ret = target_check_reservation(cmd);
+ if (ret) {
+ cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
+ goto err;
}
}
+
+ ret = cmd->execute_cmd(cmd);
+ if (!ret)
+ return;
+err:
+ spin_lock_irq(&cmd->t_state_lock);
+ cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
+ spin_unlock_irq(&cmd->t_state_lock);
+
+ transport_generic_request_failure(cmd, ret);
}
static int target_write_prot_action(struct se_cmd *cmd)
if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
return false;
+ cmd->se_cmd_flags |= SCF_TASK_ATTR_SET;
+
/*
* Check for the existence of HEAD_OF_QUEUE, and if true return 1
* to allow the passed struct se_cmd list of tasks to the front of the list.
return;
}
- __target_execute_cmd(cmd);
+ __target_execute_cmd(cmd, true);
}
EXPORT_SYMBOL(target_execute_cmd);
list_del(&cmd->se_delayed_node);
spin_unlock(&dev->delayed_cmd_lock);
- __target_execute_cmd(cmd);
+ __target_execute_cmd(cmd, true);
if (cmd->sam_task_attr == TCM_ORDERED_TAG)
break;
if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
return;
+ if (!(cmd->se_cmd_flags & SCF_TASK_ATTR_SET))
+ goto restart;
+
if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
atomic_dec_mb(&dev->simple_cmds);
dev->dev_cur_ordered_id++;
pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n",
dev->dev_cur_ordered_id);
}
-
+restart:
target_restart_delayed_cmds(dev);
}
bool fabric_stop;
spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
- if (list_empty(&se_cmd->se_cmd_list)) {
- spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
- target_free_cmd_mem(se_cmd);
- se_cmd->se_tfo->release_cmd(se_cmd);
- return;
- }
spin_lock(&se_cmd->t_state_lock);
- fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP);
+ fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
+ (se_cmd->transport_state & CMD_T_ABORTED);
spin_unlock(&se_cmd->t_state_lock);
if (se_cmd->cmd_wait_set || fabric_stop) {
ft_sess_delete_all(tport);
lport = tport->lport;
+ lport->service_params &= ~FCP_SPPF_TARG_FCN;
BUG_ON(tport != lport->prov[FC_TYPE_FCP]);
RCU_INIT_POINTER(lport->prov[FC_TYPE_FCP], NULL);
{
mutex_lock(&ft_lport_lock);
ft_tport_get(lport);
+ lport->service_params |= FCP_SPPF_TARG_FCN;
mutex_unlock(&ft_lport_lock);
}
void *dmabuf;
int result;
- dmabuf = kmalloc(bufsize, GFP_KERNEL);
+ dmabuf = kmemdup(buf, bufsize, GFP_KERNEL);
if (!dmabuf)
return -ENOMEM;
- memcpy(dmabuf, buf, bufsize);
-
result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
req, REQTYPE_HOST_TO_INTERFACE, 0,
port_priv->bInterfaceNumber, dmabuf, bufsize,
struct usb_serial_port *port = urb->context;
unsigned char *data = urb->transfer_buffer;
unsigned long flags;
+ int status = urb->status;
int i;
for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) {
dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i,
urb->actual_length);
- switch (urb->status) {
+ switch (status) {
case 0:
break;
case -ENOENT:
case -ECONNRESET:
case -ESHUTDOWN:
dev_dbg(&port->dev, "%s - urb stopped: %d\n",
- __func__, urb->status);
+ __func__, status);
return;
case -EPIPE:
dev_err(&port->dev, "%s - urb stopped: %d\n",
- __func__, urb->status);
+ __func__, status);
return;
default:
dev_dbg(&port->dev, "%s - nonzero urb status: %d\n",
- __func__, urb->status);
+ __func__, status);
goto resubmit;
}
{
unsigned long flags;
struct usb_serial_port *port = urb->context;
+ int status = urb->status;
int i;
for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) {
set_bit(i, &port->write_urbs_free);
spin_unlock_irqrestore(&port->lock, flags);
- switch (urb->status) {
+ switch (status) {
case 0:
break;
case -ENOENT:
case -ECONNRESET:
case -ESHUTDOWN:
dev_dbg(&port->dev, "%s - urb stopped: %d\n",
- __func__, urb->status);
+ __func__, status);
return;
case -EPIPE:
dev_err_console(port, "%s - urb stopped: %d\n",
- __func__, urb->status);
+ __func__, status);
return;
default:
dev_err_console(port, "%s - nonzero urb status: %d\n",
- __func__, urb->status);
+ __func__, status);
goto resubmit;
}
#define TELIT_PRODUCT_LE922_USBCFG5 0x1045
#define TELIT_PRODUCT_LE920 0x1200
#define TELIT_PRODUCT_LE910 0x1201
+#define TELIT_PRODUCT_LE910_USBCFG4 0x1206
/* ZTE PRODUCTS */
#define ZTE_VENDOR_ID 0x19d2
.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
+ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
.driver_info = (kernel_ulong_t)&telit_le920_blacklist },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
-/* vi: ts=8 sw=8
- *
+/*
* TI 3410/5052 USB Serial Driver
*
* Copyright (C) 2004 Texas Instruments
#include <linux/usb.h>
#include <linux/usb/serial.h>
-#include "ti_usb_3410_5052.h"
-
-/* Defines */
+/* Configuration ids */
+#define TI_BOOT_CONFIG 1
+#define TI_ACTIVE_CONFIG 2
+
+/* Vendor and product ids */
+#define TI_VENDOR_ID 0x0451
+#define IBM_VENDOR_ID 0x04b3
+#define TI_3410_PRODUCT_ID 0x3410
+#define IBM_4543_PRODUCT_ID 0x4543
+#define IBM_454B_PRODUCT_ID 0x454b
+#define IBM_454C_PRODUCT_ID 0x454c
+#define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */
+#define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */
+#define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */
+#define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */
+#define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */
+#define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */
+
+/* Multi-Tech vendor and product ids */
+#define MTS_VENDOR_ID 0x06E0
+#define MTS_GSM_NO_FW_PRODUCT_ID 0xF108
+#define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109
+#define MTS_CDMA_PRODUCT_ID 0xF110
+#define MTS_GSM_PRODUCT_ID 0xF111
+#define MTS_EDGE_PRODUCT_ID 0xF112
+#define MTS_MT9234MU_PRODUCT_ID 0xF114
+#define MTS_MT9234ZBA_PRODUCT_ID 0xF115
+#define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319
+
+/* Abbott Diabetics vendor and product ids */
+#define ABBOTT_VENDOR_ID 0x1a61
+#define ABBOTT_STEREO_PLUG_ID 0x3410
+#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID
+#define ABBOTT_STRIP_PORT_ID 0x3420
+
+/* Honeywell vendor and product IDs */
+#define HONEYWELL_VENDOR_ID 0x10ac
+#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */
+
+/* Moxa UPORT 11x0 vendor and product IDs */
+#define MXU1_VENDOR_ID 0x110a
+#define MXU1_1110_PRODUCT_ID 0x1110
+#define MXU1_1130_PRODUCT_ID 0x1130
+#define MXU1_1150_PRODUCT_ID 0x1150
+#define MXU1_1151_PRODUCT_ID 0x1151
+#define MXU1_1131_PRODUCT_ID 0x1131
+
+/* Commands */
+#define TI_GET_VERSION 0x01
+#define TI_GET_PORT_STATUS 0x02
+#define TI_GET_PORT_DEV_INFO 0x03
+#define TI_GET_CONFIG 0x04
+#define TI_SET_CONFIG 0x05
+#define TI_OPEN_PORT 0x06
+#define TI_CLOSE_PORT 0x07
+#define TI_START_PORT 0x08
+#define TI_STOP_PORT 0x09
+#define TI_TEST_PORT 0x0A
+#define TI_PURGE_PORT 0x0B
+#define TI_RESET_EXT_DEVICE 0x0C
+#define TI_WRITE_DATA 0x80
+#define TI_READ_DATA 0x81
+#define TI_REQ_TYPE_CLASS 0x82
+
+/* Module identifiers */
+#define TI_I2C_PORT 0x01
+#define TI_IEEE1284_PORT 0x02
+#define TI_UART1_PORT 0x03
+#define TI_UART2_PORT 0x04
+#define TI_RAM_PORT 0x05
+
+/* Modem status */
+#define TI_MSR_DELTA_CTS 0x01
+#define TI_MSR_DELTA_DSR 0x02
+#define TI_MSR_DELTA_RI 0x04
+#define TI_MSR_DELTA_CD 0x08
+#define TI_MSR_CTS 0x10
+#define TI_MSR_DSR 0x20
+#define TI_MSR_RI 0x40
+#define TI_MSR_CD 0x80
+#define TI_MSR_DELTA_MASK 0x0F
+#define TI_MSR_MASK 0xF0
+
+/* Line status */
+#define TI_LSR_OVERRUN_ERROR 0x01
+#define TI_LSR_PARITY_ERROR 0x02
+#define TI_LSR_FRAMING_ERROR 0x04
+#define TI_LSR_BREAK 0x08
+#define TI_LSR_ERROR 0x0F
+#define TI_LSR_RX_FULL 0x10
+#define TI_LSR_TX_EMPTY 0x20
+
+/* Line control */
+#define TI_LCR_BREAK 0x40
+
+/* Modem control */
+#define TI_MCR_LOOP 0x04
+#define TI_MCR_DTR 0x10
+#define TI_MCR_RTS 0x20
+
+/* Mask settings */
+#define TI_UART_ENABLE_RTS_IN 0x0001
+#define TI_UART_DISABLE_RTS 0x0002
+#define TI_UART_ENABLE_PARITY_CHECKING 0x0008
+#define TI_UART_ENABLE_DSR_OUT 0x0010
+#define TI_UART_ENABLE_CTS_OUT 0x0020
+#define TI_UART_ENABLE_X_OUT 0x0040
+#define TI_UART_ENABLE_XA_OUT 0x0080
+#define TI_UART_ENABLE_X_IN 0x0100
+#define TI_UART_ENABLE_DTR_IN 0x0800
+#define TI_UART_DISABLE_DTR 0x1000
+#define TI_UART_ENABLE_MS_INTS 0x2000
+#define TI_UART_ENABLE_AUTO_START_DMA 0x4000
+
+/* Parity */
+#define TI_UART_NO_PARITY 0x00
+#define TI_UART_ODD_PARITY 0x01
+#define TI_UART_EVEN_PARITY 0x02
+#define TI_UART_MARK_PARITY 0x03
+#define TI_UART_SPACE_PARITY 0x04
+
+/* Stop bits */
+#define TI_UART_1_STOP_BITS 0x00
+#define TI_UART_1_5_STOP_BITS 0x01
+#define TI_UART_2_STOP_BITS 0x02
+
+/* Bits per character */
+#define TI_UART_5_DATA_BITS 0x00
+#define TI_UART_6_DATA_BITS 0x01
+#define TI_UART_7_DATA_BITS 0x02
+#define TI_UART_8_DATA_BITS 0x03
+
+/* 232/485 modes */
+#define TI_UART_232 0x00
+#define TI_UART_485_RECEIVER_DISABLED 0x01
+#define TI_UART_485_RECEIVER_ENABLED 0x02
+
+/* Pipe transfer mode and timeout */
+#define TI_PIPE_MODE_CONTINUOUS 0x01
+#define TI_PIPE_MODE_MASK 0x03
+#define TI_PIPE_TIMEOUT_MASK 0x7C
+#define TI_PIPE_TIMEOUT_ENABLE 0x80
+
+/* Config struct */
+struct ti_uart_config {
+ __u16 wBaudRate;
+ __u16 wFlags;
+ __u8 bDataBits;
+ __u8 bParity;
+ __u8 bStopBits;
+ char cXon;
+ char cXoff;
+ __u8 bUartMode;
+} __packed;
+
+/* Get port status */
+struct ti_port_status {
+ __u8 bCmdCode;
+ __u8 bModuleId;
+ __u8 bErrorCode;
+ __u8 bMSR;
+ __u8 bLSR;
+} __packed;
+
+/* Purge modes */
+#define TI_PURGE_OUTPUT 0x00
+#define TI_PURGE_INPUT 0x80
+
+/* Read/Write data */
+#define TI_RW_DATA_ADDR_SFR 0x10
+#define TI_RW_DATA_ADDR_IDATA 0x20
+#define TI_RW_DATA_ADDR_XDATA 0x30
+#define TI_RW_DATA_ADDR_CODE 0x40
+#define TI_RW_DATA_ADDR_GPIO 0x50
+#define TI_RW_DATA_ADDR_I2C 0x60
+#define TI_RW_DATA_ADDR_FLASH 0x70
+#define TI_RW_DATA_ADDR_DSP 0x80
+
+#define TI_RW_DATA_UNSPECIFIED 0x00
+#define TI_RW_DATA_BYTE 0x01
+#define TI_RW_DATA_WORD 0x02
+#define TI_RW_DATA_DOUBLE_WORD 0x04
+
+struct ti_write_data_bytes {
+ __u8 bAddrType;
+ __u8 bDataType;
+ __u8 bDataCounter;
+ __be16 wBaseAddrHi;
+ __be16 wBaseAddrLo;
+ __u8 bData[0];
+} __packed;
+
+struct ti_read_data_request {
+ __u8 bAddrType;
+ __u8 bDataType;
+ __u8 bDataCounter;
+ __be16 wBaseAddrHi;
+ __be16 wBaseAddrLo;
+} __packed;
+
+struct ti_read_data_bytes {
+ __u8 bCmdCode;
+ __u8 bModuleId;
+ __u8 bErrorCode;
+ __u8 bData[0];
+} __packed;
+
+/* Interrupt struct */
+struct ti_interrupt {
+ __u8 bICode;
+ __u8 bIInfo;
+} __packed;
+
+/* Interrupt codes */
+#define TI_CODE_HARDWARE_ERROR 0xFF
+#define TI_CODE_DATA_ERROR 0x03
+#define TI_CODE_MODEM_STATUS 0x04
+
+/* Download firmware max packet size */
+#define TI_DOWNLOAD_MAX_PACKET_SIZE 64
+
+/* Firmware image header */
+struct ti_firmware_header {
+ __le16 wLength;
+ __u8 bCheckSum;
+} __packed;
+
+/* UART addresses */
+#define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */
+#define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */
+#define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */
+#define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */
#define TI_DRIVER_AUTHOR "Al Borchers <alborchers@steinerpoint.com>"
#define TI_DRIVER_DESC "TI USB 3410/5052 Serial Driver"
#define TI_EXTRA_VID_PID_COUNT 5
-
-/* Structures */
-
struct ti_port {
int tp_is_open;
__u8 tp_msr;
int td_urb_error;
};
-
-/* Function Declarations */
-
static int ti_startup(struct usb_serial *serial);
static void ti_release(struct usb_serial *serial);
static int ti_port_probe(struct usb_serial_port *port);
static int ti_download_firmware(struct ti_device *tdev);
-
-/* Data */
-
-/* module parameters */
static int closing_wait = TI_DEFAULT_CLOSING_WAIT;
-/* supported devices */
static const struct usb_device_id ti_id_table_3410[] = {
{ USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) },
{ USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) },
{ USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) },
{ USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) },
{ USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) },
- { } /* terminator */
+ { }
};
static const struct usb_device_id ti_id_table_combined[] = {
&ti_1port_device, &ti_2port_device, NULL
};
-/* Module */
-
MODULE_AUTHOR(TI_DRIVER_AUTHOR);
MODULE_DESCRIPTION(TI_DRIVER_DESC);
MODULE_LICENSE("GPL");
module_usb_serial_driver(serial_drivers, ti_id_table_combined);
-/* Functions */
-
static int ti_startup(struct usb_serial *serial)
{
struct ti_device *tdev;
dev->descriptor.bNumConfigurations,
dev->actconfig->desc.bConfigurationValue);
- /* create device structure */
tdev = kzalloc(sizeof(struct ti_device), GFP_KERNEL);
if (!tdev)
return -ENOMEM;
struct urb *urb;
int port_number;
int status;
- __u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINOUS |
+ __u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINUOUS |
TI_PIPE_TIMEOUT_ENABLE |
(TI_TRANSFER_TIMEOUT << 2));
dev_dbg(&port->dev, "%s - error setting break, %d\n", __func__, status);
}
+static int ti_get_port_from_code(unsigned char code)
+{
+ return (code >> 4) - 3;
+}
+
+static int ti_get_func_from_code(unsigned char code)
+{
+ return code & 0x0f;
+}
static void ti_interrupt_callback(struct urb *urb)
{
goto exit;
}
- port_number = TI_GET_PORT_FROM_CODE(data[0]);
- function = TI_GET_FUNC_FROM_CODE(data[0]);
+ port_number = ti_get_port_from_code(data[0]);
+ function = ti_get_func_from_code(data[0]);
dev_dbg(dev, "%s - port_number %d, function %d, data 0x%02X\n",
__func__, port_number, function, data[1]);
+++ /dev/null
-/* vi: ts=8 sw=8
- *
- * TI 3410/5052 USB Serial Driver Header
- *
- * Copyright (C) 2004 Texas Instruments
- *
- * This driver is based on the Linux io_ti driver, which is
- * Copyright (C) 2000-2002 Inside Out Networks
- * Copyright (C) 2001-2002 Greg Kroah-Hartman
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * For questions or problems with this driver, contact Texas Instruments
- * technical support, or Al Borchers <alborchers@steinerpoint.com>, or
- * Peter Berger <pberger@brimson.com>.
- */
-
-#ifndef _TI_3410_5052_H_
-#define _TI_3410_5052_H_
-
-/* Configuration ids */
-#define TI_BOOT_CONFIG 1
-#define TI_ACTIVE_CONFIG 2
-
-/* Vendor and product ids */
-#define TI_VENDOR_ID 0x0451
-#define IBM_VENDOR_ID 0x04b3
-#define TI_3410_PRODUCT_ID 0x3410
-#define IBM_4543_PRODUCT_ID 0x4543
-#define IBM_454B_PRODUCT_ID 0x454b
-#define IBM_454C_PRODUCT_ID 0x454c
-#define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */
-#define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */
-#define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */
-#define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */
-#define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */
-#define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */
-
-/* Multi-Tech vendor and product ids */
-#define MTS_VENDOR_ID 0x06E0
-#define MTS_GSM_NO_FW_PRODUCT_ID 0xF108
-#define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109
-#define MTS_CDMA_PRODUCT_ID 0xF110
-#define MTS_GSM_PRODUCT_ID 0xF111
-#define MTS_EDGE_PRODUCT_ID 0xF112
-#define MTS_MT9234MU_PRODUCT_ID 0xF114
-#define MTS_MT9234ZBA_PRODUCT_ID 0xF115
-#define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319
-
-/* Abbott Diabetics vendor and product ids */
-#define ABBOTT_VENDOR_ID 0x1a61
-#define ABBOTT_STEREO_PLUG_ID 0x3410
-#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID
-#define ABBOTT_STRIP_PORT_ID 0x3420
-
-/* Honeywell vendor and product IDs */
-#define HONEYWELL_VENDOR_ID 0x10ac
-#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */
-
-/* Moxa UPORT 11x0 vendor and product IDs */
-#define MXU1_VENDOR_ID 0x110a
-#define MXU1_1110_PRODUCT_ID 0x1110
-#define MXU1_1130_PRODUCT_ID 0x1130
-#define MXU1_1131_PRODUCT_ID 0x1131
-#define MXU1_1150_PRODUCT_ID 0x1150
-#define MXU1_1151_PRODUCT_ID 0x1151
-
-/* Commands */
-#define TI_GET_VERSION 0x01
-#define TI_GET_PORT_STATUS 0x02
-#define TI_GET_PORT_DEV_INFO 0x03
-#define TI_GET_CONFIG 0x04
-#define TI_SET_CONFIG 0x05
-#define TI_OPEN_PORT 0x06
-#define TI_CLOSE_PORT 0x07
-#define TI_START_PORT 0x08
-#define TI_STOP_PORT 0x09
-#define TI_TEST_PORT 0x0A
-#define TI_PURGE_PORT 0x0B
-#define TI_RESET_EXT_DEVICE 0x0C
-#define TI_WRITE_DATA 0x80
-#define TI_READ_DATA 0x81
-#define TI_REQ_TYPE_CLASS 0x82
-
-/* Module identifiers */
-#define TI_I2C_PORT 0x01
-#define TI_IEEE1284_PORT 0x02
-#define TI_UART1_PORT 0x03
-#define TI_UART2_PORT 0x04
-#define TI_RAM_PORT 0x05
-
-/* Modem status */
-#define TI_MSR_DELTA_CTS 0x01
-#define TI_MSR_DELTA_DSR 0x02
-#define TI_MSR_DELTA_RI 0x04
-#define TI_MSR_DELTA_CD 0x08
-#define TI_MSR_CTS 0x10
-#define TI_MSR_DSR 0x20
-#define TI_MSR_RI 0x40
-#define TI_MSR_CD 0x80
-#define TI_MSR_DELTA_MASK 0x0F
-#define TI_MSR_MASK 0xF0
-
-/* Line status */
-#define TI_LSR_OVERRUN_ERROR 0x01
-#define TI_LSR_PARITY_ERROR 0x02
-#define TI_LSR_FRAMING_ERROR 0x04
-#define TI_LSR_BREAK 0x08
-#define TI_LSR_ERROR 0x0F
-#define TI_LSR_RX_FULL 0x10
-#define TI_LSR_TX_EMPTY 0x20
-
-/* Line control */
-#define TI_LCR_BREAK 0x40
-
-/* Modem control */
-#define TI_MCR_LOOP 0x04
-#define TI_MCR_DTR 0x10
-#define TI_MCR_RTS 0x20
-
-/* Mask settings */
-#define TI_UART_ENABLE_RTS_IN 0x0001
-#define TI_UART_DISABLE_RTS 0x0002
-#define TI_UART_ENABLE_PARITY_CHECKING 0x0008
-#define TI_UART_ENABLE_DSR_OUT 0x0010
-#define TI_UART_ENABLE_CTS_OUT 0x0020
-#define TI_UART_ENABLE_X_OUT 0x0040
-#define TI_UART_ENABLE_XA_OUT 0x0080
-#define TI_UART_ENABLE_X_IN 0x0100
-#define TI_UART_ENABLE_DTR_IN 0x0800
-#define TI_UART_DISABLE_DTR 0x1000
-#define TI_UART_ENABLE_MS_INTS 0x2000
-#define TI_UART_ENABLE_AUTO_START_DMA 0x4000
-
-/* Parity */
-#define TI_UART_NO_PARITY 0x00
-#define TI_UART_ODD_PARITY 0x01
-#define TI_UART_EVEN_PARITY 0x02
-#define TI_UART_MARK_PARITY 0x03
-#define TI_UART_SPACE_PARITY 0x04
-
-/* Stop bits */
-#define TI_UART_1_STOP_BITS 0x00
-#define TI_UART_1_5_STOP_BITS 0x01
-#define TI_UART_2_STOP_BITS 0x02
-
-/* Bits per character */
-#define TI_UART_5_DATA_BITS 0x00
-#define TI_UART_6_DATA_BITS 0x01
-#define TI_UART_7_DATA_BITS 0x02
-#define TI_UART_8_DATA_BITS 0x03
-
-/* 232/485 modes */
-#define TI_UART_232 0x00
-#define TI_UART_485_RECEIVER_DISABLED 0x01
-#define TI_UART_485_RECEIVER_ENABLED 0x02
-
-/* Pipe transfer mode and timeout */
-#define TI_PIPE_MODE_CONTINOUS 0x01
-#define TI_PIPE_MODE_MASK 0x03
-#define TI_PIPE_TIMEOUT_MASK 0x7C
-#define TI_PIPE_TIMEOUT_ENABLE 0x80
-
-/* Config struct */
-struct ti_uart_config {
- __u16 wBaudRate;
- __u16 wFlags;
- __u8 bDataBits;
- __u8 bParity;
- __u8 bStopBits;
- char cXon;
- char cXoff;
- __u8 bUartMode;
-} __attribute__((packed));
-
-/* Get port status */
-struct ti_port_status {
- __u8 bCmdCode;
- __u8 bModuleId;
- __u8 bErrorCode;
- __u8 bMSR;
- __u8 bLSR;
-} __attribute__((packed));
-
-/* Purge modes */
-#define TI_PURGE_OUTPUT 0x00
-#define TI_PURGE_INPUT 0x80
-
-/* Read/Write data */
-#define TI_RW_DATA_ADDR_SFR 0x10
-#define TI_RW_DATA_ADDR_IDATA 0x20
-#define TI_RW_DATA_ADDR_XDATA 0x30
-#define TI_RW_DATA_ADDR_CODE 0x40
-#define TI_RW_DATA_ADDR_GPIO 0x50
-#define TI_RW_DATA_ADDR_I2C 0x60
-#define TI_RW_DATA_ADDR_FLASH 0x70
-#define TI_RW_DATA_ADDR_DSP 0x80
-
-#define TI_RW_DATA_UNSPECIFIED 0x00
-#define TI_RW_DATA_BYTE 0x01
-#define TI_RW_DATA_WORD 0x02
-#define TI_RW_DATA_DOUBLE_WORD 0x04
-
-struct ti_write_data_bytes {
- __u8 bAddrType;
- __u8 bDataType;
- __u8 bDataCounter;
- __be16 wBaseAddrHi;
- __be16 wBaseAddrLo;
- __u8 bData[0];
-} __attribute__((packed));
-
-struct ti_read_data_request {
- __u8 bAddrType;
- __u8 bDataType;
- __u8 bDataCounter;
- __be16 wBaseAddrHi;
- __be16 wBaseAddrLo;
-} __attribute__((packed));
-
-struct ti_read_data_bytes {
- __u8 bCmdCode;
- __u8 bModuleId;
- __u8 bErrorCode;
- __u8 bData[0];
-} __attribute__((packed));
-
-/* Interrupt struct */
-struct ti_interrupt {
- __u8 bICode;
- __u8 bIInfo;
-} __attribute__((packed));
-
-/* Interrupt codes */
-#define TI_GET_PORT_FROM_CODE(c) (((c) >> 4) - 3)
-#define TI_GET_FUNC_FROM_CODE(c) ((c) & 0x0f)
-#define TI_CODE_HARDWARE_ERROR 0xFF
-#define TI_CODE_DATA_ERROR 0x03
-#define TI_CODE_MODEM_STATUS 0x04
-
-/* Download firmware max packet size */
-#define TI_DOWNLOAD_MAX_PACKET_SIZE 64
-
-/* Firmware image header */
-struct ti_firmware_header {
- __le16 wLength;
- __u8 bCheckSum;
-} __attribute__((packed));
-
-/* UART addresses */
-#define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */
-#define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */
-#define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */
-#define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */
-
-#endif /* _TI_3410_5052_H_ */
config EXPORTFS
tristate
+config EXPORTFS_BLOCK_OPS
+ bool "Enable filesystem export operations for block IO"
+ help
+ This option enables the export operations for a filesystem to support
+ external block IO.
+
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
default y
config BINFMT_FLAT
bool "Kernel support for flat binaries"
- depends on !MMU && (!FRV || BROKEN)
+ depends on !MMU || M68K
+ depends on !FRV || BROKEN
help
Support uClinux FLAT format binaries.
struct elf_fdpic_params *);
#ifndef CONFIG_MMU
-static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *,
- unsigned long *);
static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
struct file *,
struct mm_struct *);
sp = mm->start_stack;
/* stack the program arguments and environment */
- if (elf_fdpic_transfer_args_to_stack(bprm, &sp) < 0)
+ if (transfer_args_to_stack(bprm, &sp) < 0)
return -EFAULT;
+ sp &= ~15;
#endif
/*
return 0;
}
-/*****************************************************************************/
-/*
- * transfer the program arguments and environment from the holding pages onto
- * the stack
- */
-#ifndef CONFIG_MMU
-static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm,
- unsigned long *_sp)
-{
- unsigned long index, stop, sp;
- char *src;
- int ret = 0;
-
- stop = bprm->p >> PAGE_SHIFT;
- sp = *_sp;
-
- for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
- src = kmap(bprm->page[index]);
- sp -= PAGE_SIZE;
- if (copy_to_user((void *) sp, src, PAGE_SIZE) != 0)
- ret = -EFAULT;
- kunmap(bprm->page[index]);
- if (ret < 0)
- goto out;
- }
-
- *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15;
-
-out:
- return ret;
-}
-#endif
-
/*****************************************************************************/
/*
* load the appropriate binary image (executable or interpreter) into memory
* JAN/99 -- coded full program relocation (gerg@snapgear.com)
*/
-#include <linux/export.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/slab.h>
#include <linux/personality.h>
#include <linux/init.h>
#include <linux/flat.h>
-#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#include <asm/byteorder.h>
-#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
#include <asm/page.h>
/****************************************************************************/
-#if 0
-#define DEBUG 1
-#endif
-
-#ifdef DEBUG
-#define DBG_FLT(a...) printk(a)
-#else
-#define DBG_FLT(a...)
-#endif
-
/*
* User data (data section and bss) needs to be aligned.
* We pick 0x20 here because it is the max value elf2flt has always
unsigned long text_len; /* Length of text segment */
unsigned long entry; /* Start address for this module */
unsigned long build_date; /* When this one was compiled */
- short loaded; /* Has this library been loaded? */
+ bool loaded; /* Has this library been loaded? */
} lib_list[MAX_SHARED_LIBS];
};
static int flat_core_dump(struct coredump_params *cprm)
{
- printk("Process %s:%d received signr %d and should have core dumped\n",
- current->comm, current->pid, (int) cprm->siginfo->si_signo);
- return(1);
+ pr_warn("Process %s:%d received signr %d and should have core dumped\n",
+ current->comm, current->pid, cprm->siginfo->si_signo);
+ return 1;
}
/****************************************************************************/
/*
* create_flat_tables() parses the env- and arg-strings in new user
* memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
+ * addresses on the "stack", recording the new stack pointer value.
*/
-static unsigned long create_flat_tables(
- unsigned long pp,
- struct linux_binprm * bprm)
+static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start)
{
- unsigned long *argv,*envp;
- unsigned long * sp;
- char * p = (char*)pp;
- int argc = bprm->argc;
- int envc = bprm->envc;
- char uninitialized_var(dummy);
-
- sp = (unsigned long *)p;
- sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
- argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- envp = argv + (argc + 1);
+ char __user *p;
+ unsigned long __user *sp;
+ long i, len;
+
+ p = (char __user *)arg_start;
+ sp = (unsigned long __user *)current->mm->start_stack;
+
+ sp -= bprm->envc + 1;
+ sp -= bprm->argc + 1;
+ sp -= flat_argvp_envp_on_stack() ? 2 : 0;
+ sp -= 1; /* &argc */
+ current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
+ sp = (unsigned long __user *)current->mm->start_stack;
+
+ __put_user(bprm->argc, sp++);
if (flat_argvp_envp_on_stack()) {
- put_user((unsigned long) envp, sp + 2);
- put_user((unsigned long) argv, sp + 1);
- }
-
- put_user(argc, sp);
- current->mm->arg_start = (unsigned long) p;
- while (argc-->0) {
- put_user((unsigned long) p, argv++);
- do {
- get_user(dummy, p); p++;
- } while (dummy);
- }
- put_user((unsigned long) NULL, argv);
- current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-->0) {
- put_user((unsigned long)p, envp); envp++;
- do {
- get_user(dummy, p); p++;
- } while (dummy);
- }
- put_user((unsigned long) NULL, envp);
- current->mm->env_end = (unsigned long) p;
- return (unsigned long)sp;
+ unsigned long argv, envp;
+ argv = (unsigned long)(sp + 2);
+ envp = (unsigned long)(sp + 2 + bprm->argc + 1);
+ __put_user(argv, sp++);
+ __put_user(envp, sp++);
+ }
+
+ current->mm->arg_start = (unsigned long)p;
+ for (i = bprm->argc; i > 0; i--) {
+ __put_user((unsigned long)p, sp++);
+ len = strnlen_user(p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
+ return -EINVAL;
+ p += len;
+ }
+ __put_user(0, sp++);
+ current->mm->arg_end = (unsigned long)p;
+
+ current->mm->env_start = (unsigned long) p;
+ for (i = bprm->envc; i > 0; i--) {
+ __put_user((unsigned long)p, sp++);
+ len = strnlen_user(p, MAX_ARG_STRLEN);
+ if (!len || len > MAX_ARG_STRLEN)
+ return -EINVAL;
+ p += len;
+ }
+ __put_user(0, sp++);
+ current->mm->env_end = (unsigned long)p;
+
+ return 0;
}
/****************************************************************************/
loff_t fpos;
int ret, retval;
- DBG_FLT("decompress_exec(offset=%x,buf=%x,len=%x)\n",(int)offset, (int)dst, (int)len);
+ pr_debug("decompress_exec(offset=%lx,buf=%p,len=%lx)\n", offset, dst, len);
memset(&strm, 0, sizeof(strm));
strm.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
if (strm.workspace == NULL) {
- DBG_FLT("binfmt_flat: no memory for decompress workspace\n");
+ pr_debug("no memory for decompress workspace\n");
return -ENOMEM;
}
buf = kmalloc(LBUFSIZE, GFP_KERNEL);
if (buf == NULL) {
- DBG_FLT("binfmt_flat: no memory for read buffer\n");
+ pr_debug("no memory for read buffer\n");
retval = -ENOMEM;
goto out_free;
}
/* Check minimum size -- gzip header */
if (ret < 10) {
- DBG_FLT("binfmt_flat: file too small?\n");
+ pr_debug("file too small?\n");
goto out_free_buf;
}
/* Check gzip magic number */
if ((buf[0] != 037) || ((buf[1] != 0213) && (buf[1] != 0236))) {
- DBG_FLT("binfmt_flat: unknown compression magic?\n");
+ pr_debug("unknown compression magic?\n");
goto out_free_buf;
}
/* Check gzip method */
if (buf[2] != 8) {
- DBG_FLT("binfmt_flat: unknown compression method?\n");
+ pr_debug("unknown compression method?\n");
goto out_free_buf;
}
/* Check gzip flags */
if ((buf[3] & ENCRYPTED) || (buf[3] & CONTINUATION) ||
(buf[3] & RESERVED)) {
- DBG_FLT("binfmt_flat: unknown flags?\n");
+ pr_debug("unknown flags?\n");
goto out_free_buf;
}
ret = 10;
if (buf[3] & EXTRA_FIELD) {
ret += 2 + buf[10] + (buf[11] << 8);
- if (unlikely(LBUFSIZE <= ret)) {
- DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n");
+ if (unlikely(ret >= LBUFSIZE)) {
+ pr_debug("buffer overflow (EXTRA)?\n");
goto out_free_buf;
}
}
if (buf[3] & ORIG_NAME) {
while (ret < LBUFSIZE && buf[ret++] != 0)
;
- if (unlikely(LBUFSIZE == ret)) {
- DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n");
+ if (unlikely(ret == LBUFSIZE)) {
+ pr_debug("buffer overflow (ORIG_NAME)?\n");
goto out_free_buf;
}
}
if (buf[3] & COMMENT) {
while (ret < LBUFSIZE && buf[ret++] != 0)
;
- if (unlikely(LBUFSIZE == ret)) {
- DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n");
+ if (unlikely(ret == LBUFSIZE)) {
+ pr_debug("buffer overflow (COMMENT)?\n");
goto out_free_buf;
}
}
strm.total_out = 0;
if (zlib_inflateInit2(&strm, -MAX_WBITS) != Z_OK) {
- DBG_FLT("binfmt_flat: zlib init failed?\n");
+ pr_debug("zlib init failed?\n");
goto out_free_buf;
}
}
if (ret < 0) {
- DBG_FLT("binfmt_flat: decompression failed (%d), %s\n",
+ pr_debug("decompression failed (%d), %s\n",
ret, strm.msg);
goto out_zlib;
}
r &= 0x00ffffff; /* Trim ID off here */
}
if (id >= MAX_SHARED_LIBS) {
- printk("BINFMT_FLAT: reference 0x%x to shared library %d",
- (unsigned) r, id);
+ pr_err("reference 0x%lx to shared library %d", r, id);
goto failed;
}
if (curid != id) {
if (internalp) {
- printk("BINFMT_FLAT: reloc address 0x%x not in same module "
- "(%d != %d)", (unsigned) r, curid, id);
+ pr_err("reloc address 0x%lx not in same module "
+ "(%d != %d)", r, curid, id);
goto failed;
- } else if ( ! p->lib_list[id].loaded &&
- load_flat_shared_library(id, p) < 0) {
- printk("BINFMT_FLAT: failed to load library %d", id);
+ } else if (!p->lib_list[id].loaded &&
+ load_flat_shared_library(id, p) < 0) {
+ pr_err("failed to load library %d", id);
goto failed;
}
/* Check versioning information (i.e. time stamps) */
if (p->lib_list[id].build_date && p->lib_list[curid].build_date &&
p->lib_list[curid].build_date < p->lib_list[id].build_date) {
- printk("BINFMT_FLAT: library %d is younger than %d", id, curid);
+ pr_err("library %d is younger than %d", id, curid);
goto failed;
}
}
text_len = p->lib_list[id].text_len;
if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
- printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
- (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
+ pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)",
+ r, start_brk-start_data+text_len, text_len);
goto failed;
}
addr = r - text_len + start_data;
/* Range checked already above so doing the range tests is redundant...*/
- return(addr);
+ return addr;
failed:
- printk(", killing %s!\n", current->comm);
+ pr_cont(", killing %s!\n", current->comm);
send_sig(SIGSEGV, current, 0);
return RELOC_FAILED;
static void old_reloc(unsigned long rl)
{
-#ifdef DEBUG
- char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
-#endif
+ static const char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
flat_v2_reloc_t r;
- unsigned long *ptr;
-
+ unsigned long __user *ptr;
+ unsigned long val;
+
r.value = rl;
#if defined(CONFIG_COLDFIRE)
- ptr = (unsigned long *) (current->mm->start_code + r.reloc.offset);
+ ptr = (unsigned long __user *)(current->mm->start_code + r.reloc.offset);
#else
- ptr = (unsigned long *) (current->mm->start_data + r.reloc.offset);
+ ptr = (unsigned long __user *)(current->mm->start_data + r.reloc.offset);
#endif
+ get_user(val, ptr);
+
+ pr_debug("Relocation of variable at DATASEG+%x "
+ "(address %p, currently %lx) into segment %s\n",
+ r.reloc.offset, ptr, val, segment[r.reloc.type]);
-#ifdef DEBUG
- printk("Relocation of variable at DATASEG+%x "
- "(address %p, currently %x) into segment %s\n",
- r.reloc.offset, ptr, (int)*ptr, segment[r.reloc.type]);
-#endif
-
switch (r.reloc.type) {
case OLD_FLAT_RELOC_TYPE_TEXT:
- *ptr += current->mm->start_code;
+ val += current->mm->start_code;
break;
case OLD_FLAT_RELOC_TYPE_DATA:
- *ptr += current->mm->start_data;
+ val += current->mm->start_data;
break;
case OLD_FLAT_RELOC_TYPE_BSS:
- *ptr += current->mm->end_data;
+ val += current->mm->end_data;
break;
default:
- printk("BINFMT_FLAT: Unknown relocation type=%x\n", r.reloc.type);
+ pr_err("Unknown relocation type=%x\n", r.reloc.type);
break;
}
+ put_user(val, ptr);
-#ifdef DEBUG
- printk("Relocation became %x\n", (int)*ptr);
-#endif
-}
+ pr_debug("Relocation became %lx\n", val);
+}
/****************************************************************************/
-static int load_flat_file(struct linux_binprm * bprm,
+static int load_flat_file(struct linux_binprm *bprm,
struct lib_info *libinfo, int id, unsigned long *extra_stack)
{
- struct flat_hdr * hdr;
- unsigned long textpos = 0, datapos = 0, result;
- unsigned long realdatastart = 0;
- unsigned long text_len, data_len, bss_len, stack_len, flags;
- unsigned long full_data;
- unsigned long len, memp = 0;
- unsigned long memp_size, extra, rlim;
- unsigned long *reloc = 0, *rp;
+ struct flat_hdr *hdr;
+ unsigned long textpos, datapos, realdatastart;
+ unsigned long text_len, data_len, bss_len, stack_len, full_data, flags;
+ unsigned long len, memp, memp_size, extra, rlim;
+ unsigned long __user *reloc, *rp;
struct inode *inode;
- int i, rev, relocs = 0;
+ int i, rev, relocs;
loff_t fpos;
unsigned long start_code, end_code;
+ ssize_t result;
int ret;
hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */
}
if (flags & FLAT_FLAG_KTRACE)
- printk("BINFMT_FLAT: Loading file: %s\n", bprm->filename);
+ pr_info("Loading file: %s\n", bprm->filename);
if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
- printk("BINFMT_FLAT: bad flat file version 0x%x (supported "
- "0x%lx and 0x%lx)\n",
- rev, FLAT_VERSION, OLD_FLAT_VERSION);
+ pr_err("bad flat file version 0x%x (supported 0x%lx and 0x%lx)\n",
+ rev, FLAT_VERSION, OLD_FLAT_VERSION);
ret = -ENOEXEC;
goto err;
}
-
+
/* Don't allow old format executables to use shared libraries */
if (rev == OLD_FLAT_VERSION && id != 0) {
- printk("BINFMT_FLAT: shared libraries are not available before rev 0x%x\n",
- (int) FLAT_VERSION);
+ pr_err("shared libraries are not available before rev 0x%lx\n",
+ FLAT_VERSION);
+ ret = -ENOEXEC;
+ goto err;
+ }
+
+ /*
+ * Make sure the header params are sane.
+ * 28 bits (256 MB) is way more than reasonable in this case.
+ * If some top bits are set we have probable binary corruption.
+ */
+ if ((text_len | data_len | bss_len | stack_len | full_data) >> 28) {
+ pr_err("bad header\n");
ret = -ENOEXEC;
goto err;
}
#ifndef CONFIG_BINFMT_ZFLAT
if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
- printk("Support for ZFLAT executables is not enabled.\n");
+ pr_err("Support for ZFLAT executables is not enabled.\n");
ret = -ENOEXEC;
goto err;
}
/* Flush all traces of the currently running executable */
if (id == 0) {
- result = flush_old_exec(bprm);
- if (result) {
- ret = result;
+ ret = flush_old_exec(bprm);
+ if (ret)
goto err;
- }
/* OK, This is the point of no return */
set_personality(PER_LINUX_32BIT);
* case, and then the fully copied to RAM case which lumps
* it all together.
*/
- if ((flags & (FLAT_FLAG_RAM|FLAT_FLAG_GZIP)) == 0) {
+ if (!IS_ENABLED(CONFIG_MMU) && !(flags & (FLAT_FLAG_RAM|FLAT_FLAG_GZIP))) {
/*
* this should give us a ROM ptr, but if it doesn't we don't
* really care
*/
- DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
+ pr_debug("ROM mapping of file (we hope)\n");
textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_EXECUTABLE, 0);
if (!textpos || IS_ERR_VALUE(textpos)) {
- if (!textpos)
- textpos = (unsigned long) -ENOMEM;
- printk("Unable to mmap process text, errno %d\n", (int)-textpos);
ret = textpos;
+ if (!textpos)
+ ret = -ENOMEM;
+ pr_err("Unable to mmap process text, errno %d\n", ret);
goto err;
}
len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- realdatastart = vm_mmap(0, 0, len,
+ realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
+ ret = realdatastart;
if (!realdatastart)
- realdatastart = (unsigned long) -ENOMEM;
- printk("Unable to allocate RAM for process data, errno %d\n",
- (int)-realdatastart);
+ ret = -ENOMEM;
+ pr_err("Unable to allocate RAM for process data, "
+ "errno %d\n", ret);
vm_munmap(textpos, text_len);
- ret = realdatastart;
goto err;
}
datapos = ALIGN(realdatastart +
MAX_SHARED_LIBS * sizeof(unsigned long),
FLAT_DATA_ALIGN);
- DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n",
- (int)(data_len + bss_len + stack_len), (int)datapos);
+ pr_debug("Allocated data+bss+stack (%ld bytes): %lx\n",
+ data_len + bss_len + stack_len, datapos);
fpos = ntohl(hdr->data_start);
#ifdef CONFIG_BINFMT_ZFLAT
if (flags & FLAT_FLAG_GZDATA) {
- result = decompress_exec(bprm, fpos, (char *) datapos,
+ result = decompress_exec(bprm, fpos, (char *)datapos,
full_data, 0);
} else
#endif
full_data);
}
if (IS_ERR_VALUE(result)) {
- printk("Unable to read data+bss, errno %d\n", (int)-result);
+ ret = result;
+ pr_err("Unable to read data+bss, errno %d\n", ret);
vm_munmap(textpos, text_len);
vm_munmap(realdatastart, len);
- ret = result;
goto err;
}
- reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
+ reloc = (unsigned long __user *)
+ (datapos + (ntohl(hdr->reloc_start) - text_len));
memp = realdatastart;
memp_size = len;
} else {
len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- textpos = vm_mmap(0, 0, len,
+ textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
if (!textpos || IS_ERR_VALUE(textpos)) {
- if (!textpos)
- textpos = (unsigned long) -ENOMEM;
- printk("Unable to allocate RAM for process text/data, errno %d\n",
- (int)-textpos);
ret = textpos;
+ if (!textpos)
+ ret = -ENOMEM;
+ pr_err("Unable to allocate RAM for process text/data, "
+ "errno %d\n", ret);
goto err;
}
MAX_SHARED_LIBS * sizeof(unsigned long),
FLAT_DATA_ALIGN);
- reloc = (unsigned long *)
+ reloc = (unsigned long __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = textpos;
memp_size = len;
* load it all in and treat it like a RAM load from now on
*/
if (flags & FLAT_FLAG_GZIP) {
- result = decompress_exec(bprm, sizeof (struct flat_hdr),
- (((char *) textpos) + sizeof (struct flat_hdr)),
+#ifndef CONFIG_MMU
+ result = decompress_exec(bprm, sizeof(struct flat_hdr),
+ (((char *)textpos) + sizeof(struct flat_hdr)),
(text_len + full_data
- - sizeof (struct flat_hdr)),
+ - sizeof(struct flat_hdr)),
0);
memmove((void *) datapos, (void *) realdatastart,
full_data);
+#else
+ /*
+ * This is used on MMU systems mainly for testing.
+ * Let's use a kernel buffer to simplify things.
+ */
+ long unz_text_len = text_len - sizeof(struct flat_hdr);
+ long unz_len = unz_text_len + full_data;
+ char *unz_data = vmalloc(unz_len);
+ if (!unz_data) {
+ result = -ENOMEM;
+ } else {
+ result = decompress_exec(bprm, sizeof(struct flat_hdr),
+ unz_data, unz_len, 0);
+ if (result == 0 &&
+ (copy_to_user((void __user *)textpos + sizeof(struct flat_hdr),
+ unz_data, unz_text_len) ||
+ copy_to_user((void __user *)datapos,
+ unz_data + unz_text_len, full_data)))
+ result = -EFAULT;
+ vfree(unz_data);
+ }
+#endif
} else if (flags & FLAT_FLAG_GZDATA) {
result = read_code(bprm->file, textpos, 0, text_len);
- if (!IS_ERR_VALUE(result))
+ if (!IS_ERR_VALUE(result)) {
+#ifndef CONFIG_MMU
result = decompress_exec(bprm, text_len, (char *) datapos,
full_data, 0);
- }
- else
+#else
+ char *unz_data = vmalloc(full_data);
+ if (!unz_data) {
+ result = -ENOMEM;
+ } else {
+ result = decompress_exec(bprm, text_len,
+ unz_data, full_data, 0);
+ if (result == 0 &&
+ copy_to_user((void __user *)datapos,
+ unz_data, full_data))
+ result = -EFAULT;
+ vfree(unz_data);
+ }
#endif
+ }
+ } else
+#endif /* CONFIG_BINFMT_ZFLAT */
{
result = read_code(bprm->file, textpos, 0, text_len);
if (!IS_ERR_VALUE(result))
full_data);
}
if (IS_ERR_VALUE(result)) {
- printk("Unable to read code+data+bss, errno %d\n",(int)-result);
+ ret = result;
+ pr_err("Unable to read code+data+bss, errno %d\n", ret);
vm_munmap(textpos, text_len + data_len + extra +
MAX_SHARED_LIBS * sizeof(unsigned long));
- ret = result;
goto err;
}
}
- if (flags & FLAT_FLAG_KTRACE)
- printk("Mapping is %x, Entry point is %x, data_start is %x\n",
- (int)textpos, 0x00ffffff&ntohl(hdr->entry), ntohl(hdr->data_start));
+ start_code = textpos + sizeof(struct flat_hdr);
+ end_code = textpos + text_len;
+ text_len -= sizeof(struct flat_hdr); /* the real code len */
/* The main program needs a little extra setup in the task structure */
- start_code = textpos + sizeof (struct flat_hdr);
- end_code = textpos + text_len;
if (id == 0) {
current->mm->start_code = start_code;
current->mm->end_code = end_code;
*/
current->mm->start_brk = datapos + data_len + bss_len;
current->mm->brk = (current->mm->start_brk + 3) & ~3;
+#ifndef CONFIG_MMU
current->mm->context.end_brk = memp + memp_size - stack_len;
+#endif
}
- if (flags & FLAT_FLAG_KTRACE)
- printk("%s %s: TEXT=%x-%x DATA=%x-%x BSS=%x-%x\n",
+ if (flags & FLAT_FLAG_KTRACE) {
+ pr_info("Mapping is %lx, Entry point is %x, data_start is %x\n",
+ textpos, 0x00ffffff&ntohl(hdr->entry), ntohl(hdr->data_start));
+ pr_info("%s %s: TEXT=%lx-%lx DATA=%lx-%lx BSS=%lx-%lx\n",
id ? "Lib" : "Load", bprm->filename,
- (int) start_code, (int) end_code,
- (int) datapos,
- (int) (datapos + data_len),
- (int) (datapos + data_len),
- (int) (((datapos + data_len + bss_len) + 3) & ~3));
-
- text_len -= sizeof(struct flat_hdr); /* the real code len */
+ start_code, end_code, datapos, datapos + data_len,
+ datapos + data_len, (datapos + data_len + bss_len + 3) & ~3);
+ }
/* Store the current module values into the global library structure */
libinfo->lib_list[id].start_code = start_code;
libinfo->lib_list[id].loaded = 1;
libinfo->lib_list[id].entry = (0x00ffffff & ntohl(hdr->entry)) + textpos;
libinfo->lib_list[id].build_date = ntohl(hdr->build_date);
-
+
/*
* We just load the allocations into some temporary memory to
* help simplify all this mumbo jumbo
* image.
*/
if (flags & FLAT_FLAG_GOTPIC) {
- for (rp = (unsigned long *)datapos; *rp != 0xffffffff; rp++) {
- unsigned long addr;
- if (*rp) {
- addr = calc_reloc(*rp, libinfo, id, 0);
+ for (rp = (unsigned long __user *)datapos; ; rp++) {
+ unsigned long addr, rp_val;
+ if (get_user(rp_val, rp))
+ return -EFAULT;
+ if (rp_val == 0xffffffff)
+ break;
+ if (rp_val) {
+ addr = calc_reloc(rp_val, libinfo, id, 0);
if (addr == RELOC_FAILED) {
ret = -ENOEXEC;
goto err;
}
- *rp = addr;
+ if (put_user(addr, rp))
+ return -EFAULT;
}
}
}
* __start to address 4 so that is okay).
*/
if (rev > OLD_FLAT_VERSION) {
- unsigned long persistent = 0;
- for (i=0; i < relocs; i++) {
+ unsigned long __maybe_unused persistent = 0;
+ for (i = 0; i < relocs; i++) {
unsigned long addr, relval;
- /* Get the address of the pointer to be
- relocated (of course, the address has to be
- relocated first). */
- relval = ntohl(reloc[i]);
- if (flat_set_persistent (relval, &persistent))
+ /*
+ * Get the address of the pointer to be
+ * relocated (of course, the address has to be
+ * relocated first).
+ */
+ if (get_user(relval, reloc + i))
+ return -EFAULT;
+ relval = ntohl(relval);
+ if (flat_set_persistent(relval, &persistent))
continue;
addr = flat_get_relocate_addr(relval);
- rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
- if (rp == (unsigned long *)RELOC_FAILED) {
+ rp = (unsigned long __user *)calc_reloc(addr, libinfo, id, 1);
+ if (rp == (unsigned long __user *)RELOC_FAILED) {
ret = -ENOEXEC;
goto err;
}
}
}
} else {
- for (i=0; i < relocs; i++)
- old_reloc(ntohl(reloc[i]));
+ for (i = 0; i < relocs; i++) {
+ unsigned long relval;
+ if (get_user(relval, reloc + i))
+ return -EFAULT;
+ relval = ntohl(relval);
+ old_reloc(relval);
+ }
}
-
+
flush_icache_range(start_code, end_code);
/* zero the BSS, BRK and stack areas */
- memset((void*)(datapos + data_len), 0, bss_len +
- (memp + memp_size - stack_len - /* end brk */
- libinfo->lib_list[id].start_brk) + /* start brk */
- stack_len);
+ if (clear_user((void __user *)(datapos + data_len), bss_len +
+ (memp + memp_size - stack_len - /* end brk */
+ libinfo->lib_list[id].start_brk) + /* start brk */
+ stack_len))
+ return -EFAULT;
return 0;
err:
allow_write_access(bprm.file);
fput(bprm.file);
- return(res);
+ return res;
}
#endif /* CONFIG_BINFMT_SHARED_FLAT */
* libraries. There is no binary dependent code anywhere else.
*/
-static int load_flat_binary(struct linux_binprm * bprm)
+static int load_flat_binary(struct linux_binprm *bprm)
{
struct lib_info libinfo;
struct pt_regs *regs = current_pt_regs();
- unsigned long p = bprm->p;
- unsigned long stack_len;
+ unsigned long stack_len = 0;
unsigned long start_addr;
- unsigned long *sp;
int res;
int i, j;
memset(&libinfo, 0, sizeof(libinfo));
+
/*
* We have to add the size of our arguments to our stack size
* otherwise it's too easy for users to create stack overflows
* pedantic and include space for the argv/envp array as it may have
* a lot of entries.
*/
-#define TOP_OF_ARGS (PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *))
- stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
- stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
- stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
- stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
-
+#ifndef CONFIG_MMU
+ stack_len += PAGE_SIZE * MAX_ARG_PAGES - bprm->p; /* the strings */
+#endif
+ stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
+ stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
+ stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
+
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (res < 0)
return res;
-
+
/* Update data segment pointers for all libraries */
- for (i=0; i<MAX_SHARED_LIBS; i++)
- if (libinfo.lib_list[i].loaded)
- for (j=0; j<MAX_SHARED_LIBS; j++)
- (-(j+1))[(unsigned long *)(libinfo.lib_list[i].start_data)] =
- (libinfo.lib_list[j].loaded)?
- libinfo.lib_list[j].start_data:UNLOADED_LIB;
+ for (i = 0; i < MAX_SHARED_LIBS; i++) {
+ if (!libinfo.lib_list[i].loaded)
+ continue;
+ for (j = 0; j < MAX_SHARED_LIBS; j++) {
+ unsigned long val = libinfo.lib_list[j].loaded ?
+ libinfo.lib_list[j].start_data : UNLOADED_LIB;
+ unsigned long __user *p = (unsigned long __user *)
+ libinfo.lib_list[i].start_data;
+ p -= j + 1;
+ if (put_user(val, p))
+ return -EFAULT;
+ }
+ }
install_exec_creds(bprm);
set_binfmt(&flat_format);
- p = ((current->mm->context.end_brk + stack_len + 3) & ~3) - 4;
- DBG_FLT("p=%x\n", (int)p);
+#ifdef CONFIG_MMU
+ res = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
+ if (!res)
+ res = create_flat_tables(bprm, bprm->p);
+#else
+ /* Stash our initial stack pointer into the mm structure */
+ current->mm->start_stack =
+ ((current->mm->context.end_brk + stack_len + 3) & ~3) - 4;
+ pr_debug("sp=%lx\n", current->mm->start_stack);
- /* copy the arg pages onto the stack, this could be more efficient :-) */
- for (i = TOP_OF_ARGS - 1; i >= bprm->p; i--)
- * (char *) --p =
- ((char *) page_address(bprm->page[i/PAGE_SIZE]))[i % PAGE_SIZE];
+ /* copy the arg pages onto the stack */
+ res = transfer_args_to_stack(bprm, ¤t->mm->start_stack);
+ if (!res)
+ res = create_flat_tables(bprm, current->mm->start_stack);
+#endif
+ if (res)
+ return res;
- sp = (unsigned long *) create_flat_tables(p, bprm);
-
/* Fake some return addresses to ensure the call chain will
* initialise library in order for us. We are required to call
* lib 1 first, then 2, ... and finally the main program (id 0).
start_addr = libinfo.lib_list[0].entry;
#ifdef CONFIG_BINFMT_SHARED_FLAT
- for (i = MAX_SHARED_LIBS-1; i>0; i--) {
+ for (i = MAX_SHARED_LIBS-1; i > 0; i--) {
if (libinfo.lib_list[i].loaded) {
/* Push previos first to call address */
- --sp; put_user(start_addr, sp);
+ unsigned long __user *sp;
+ current->mm->start_stack -= sizeof(unsigned long);
+ sp = (unsigned long __user *)current->mm->start_stack;
+ __put_user(start_addr, sp);
start_addr = libinfo.lib_list[i].entry;
}
}
#endif
-
- /* Stash our initial stack pointer into the mm structure */
- current->mm->start_stack = (unsigned long )sp;
#ifdef FLAT_PLAT_INIT
FLAT_PLAT_INIT(regs);
#endif
- DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n",
- (int)regs, (int)start_addr, (int)current->mm->start_stack);
-
+
+ pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n",
+ regs, start_addr, current->mm->start_stack);
start_thread(regs, start_addr, current->mm->start_stack);
return 0;
register_binfmt(&flat_format);
return 0;
}
-
-/****************************************************************************/
-
core_initcall(init_flat_binfmt);
/****************************************************************************/
}
if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
- } else if (size == -ENOENT || size == -ENODATA || size == 0) {
- /* FIXME, who returns -ENOENT? I think nobody */
+ } else if (size == -ERANGE || size == -ENODATA || size == 0) {
acl = NULL;
} else {
acl = ERR_PTR(-EIO);
struct __btrfs_workqueue {
struct workqueue_struct *normal_wq;
+
+ /* File system this workqueue services */
+ struct btrfs_fs_info *fs_info;
+
/* List head pointing to ordered work list */
struct list_head ordered_list;
normal_work_helper(work); \
}
+struct btrfs_fs_info *
+btrfs_workqueue_owner(struct __btrfs_workqueue *wq)
+{
+ return wq->fs_info;
+}
+
+struct btrfs_fs_info *
+btrfs_work_owner(struct btrfs_work *work)
+{
+ return work->wq->fs_info;
+}
+
BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
BTRFS_WORK_HELPER(scrubparity_helper);
static struct __btrfs_workqueue *
-__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
- int thresh)
+__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
+ unsigned int flags, int limit_active, int thresh)
{
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
+ ret->fs_info = fs_info;
ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
static inline void
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
-struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
+struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
+ const char *name,
unsigned int flags,
int limit_active,
int thresh)
if (!ret)
return NULL;
- ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
+ ret->normal = __btrfs_alloc_workqueue(fs_info, name,
+ flags & ~WQ_HIGHPRI,
limit_active, thresh);
if (!ret->normal) {
kfree(ret);
}
if (flags & WQ_HIGHPRI) {
- ret->high = __btrfs_alloc_workqueue(name, flags, limit_active,
- thresh);
+ ret->high = __btrfs_alloc_workqueue(fs_info, name, flags,
+ limit_active, thresh);
if (!ret->high) {
__btrfs_destroy_workqueue(ret->normal);
kfree(ret);
#define __BTRFS_ASYNC_THREAD_
#include <linux/workqueue.h>
+struct btrfs_fs_info;
struct btrfs_workqueue;
/* Internal use only */
struct __btrfs_workqueue;
BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
-struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
+struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
+ const char *name,
unsigned int flags,
int limit_active,
int thresh);
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
+struct btrfs_fs_info *btrfs_work_owner(struct btrfs_work *work);
+struct btrfs_fs_info *btrfs_workqueue_owner(struct __btrfs_workqueue *wq);
#endif
btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
sizeof(struct __prelim_ref),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_prelim_ref_cache)
return -ENOMEM;
goto out;
}
- if (btrfs_test_is_dummy_root(root)) {
+ if (btrfs_is_testing(fs_info)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = -ENOENT;
goto out;
}
ret = btrfs_map_bio(root, bio, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
bio_put(bio);
}
ret = btrfs_map_bio(root, bio, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ }
bio_put(bio);
return 0;
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
if (last_ref) {
ret = tree_mod_log_free_eb(root->fs_info, buf);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
struct btrfs_root *root,
struct extent_buffer *buf)
{
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return 0;
/* ensure we can see the force_cow */
unsigned long map_len = 0;
int err;
+ if (low > high) {
+ btrfs_err(eb->fs_info,
+ "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
+ __func__, low, high, eb->start,
+ btrfs_header_owner(eb), btrfs_header_level(eb));
+ return -EINVAL;
+ }
+
while (low < high) {
mid = (low + high) / 2;
offset = p + mid * item_size;
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
- * NULL is returned on error.
*/
static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
struct extent_buffer *parent, int slot)
int level = btrfs_header_level(parent);
struct extent_buffer *eb;
- if (slot < 0)
- return NULL;
- if (slot >= btrfs_header_nritems(parent))
- return NULL;
+ if (slot < 0 || slot >= btrfs_header_nritems(parent))
+ return ERR_PTR(-ENOENT);
BUG_ON(level == 0);
eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot));
- if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) {
- if (!IS_ERR(eb))
- free_extent_buffer(eb);
- eb = NULL;
+ if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ eb = ERR_PTR(-EIO);
}
return eb;
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
- if (!child) {
- ret = -EROFS;
+ if (IS_ERR(child)) {
+ ret = PTR_ERR(child);
btrfs_handle_fs_error(root->fs_info, ret, NULL);
goto enospc;
}
return 0;
left = read_node_slot(root, parent, pslot - 1);
+ if (IS_ERR(left))
+ left = NULL;
+
if (left) {
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
goto enospc;
}
}
+
right = read_node_slot(root, parent, pslot + 1);
+ if (IS_ERR(right))
+ right = NULL;
+
if (right) {
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
return 1;
left = read_node_slot(root, parent, pslot - 1);
+ if (IS_ERR(left))
+ left = NULL;
/* first, try to make some room in the middle buffer */
if (left) {
free_extent_buffer(left);
}
right = read_node_slot(root, parent, pslot + 1);
+ if (IS_ERR(right))
+ right = NULL;
/*
* then try to empty the right most buffer into the middle
ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
push_items);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
copy_extent_buffer(dst, src,
ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
src_nritems - push_items, push_items);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
copy_extent_buffer(dst, src,
ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
mid, c_nritems - mid);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
copy_extent_buffer(split, c,
btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
- if (right == NULL)
+ /*
+ * slot + 1 is not valid or we fail to read the right node,
+ * no big deal, just return.
+ */
+ if (IS_ERR(right))
return 1;
btrfs_tree_lock(right);
btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
- if (left == NULL)
+ /*
+ * slot - 1 is not valid or we fail to read the left node,
+ * no big deal, just return.
+ */
+ if (IS_ERR(left))
return 1;
btrfs_tree_lock(left);
}
btrfs_set_path_blocking(path);
cur = read_node_slot(root, cur, slot);
- BUG_ON(!cur); /* -ENOMEM */
+ if (IS_ERR(cur)) {
+ ret = PTR_ERR(cur);
+ goto out;
+ }
btrfs_tree_read_lock(cur);
return ret;
}
-static void tree_move_down(struct btrfs_root *root,
+static int tree_move_down(struct btrfs_root *root,
struct btrfs_path *path,
int *level, int root_level)
{
+ struct extent_buffer *eb;
+
BUG_ON(*level == 0);
- path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level],
- path->slots[*level]);
+ eb = read_node_slot(root, path->nodes[*level], path->slots[*level]);
+ if (IS_ERR(eb))
+ return PTR_ERR(eb);
+
+ path->nodes[*level - 1] = eb;
path->slots[*level - 1] = 0;
(*level)--;
+ return 0;
}
static int tree_move_next_or_upnext(struct btrfs_root *root,
if (*level == 0 || !allow_down) {
ret = tree_move_next_or_upnext(root, path, level, root_level);
} else {
- tree_move_down(root, path, level, root_level);
- ret = 0;
+ ret = tree_move_down(root, path, level, root_level);
}
if (ret >= 0) {
if (*level == 0)
left_root_level,
advance_left != ADVANCE_ONLY_NEXT,
&left_key);
- if (ret < 0)
+ if (ret == -1)
left_end_reached = ADVANCE;
+ else if (ret < 0)
+ goto out;
advance_left = 0;
}
if (advance_right && !right_end_reached) {
right_root_level,
advance_right != ADVANCE_ONLY_NEXT,
&right_key);
- if (ret < 0)
+ if (ret == -1)
right_end_reached = ADVANCE;
+ else if (ret < 0)
+ goto out;
advance_right = 0;
}
#define BTRFS_FS_STATE_REMOUNTING 1
#define BTRFS_FS_STATE_TRANS_ABORTED 2
#define BTRFS_FS_STATE_DEV_REPLACING 3
+#define BTRFS_FS_STATE_DUMMY_FS_INFO 4
#define BTRFS_BACKREF_REV_MAX 256
#define BTRFS_BACKREF_REV_SHIFT 56
u8 level;
} __attribute__ ((__packed__));
-#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
- sizeof(struct btrfs_header)) / \
- sizeof(struct btrfs_key_ptr))
-#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
-#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize))
-#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
- (offsetof(struct btrfs_file_extent_item, disk_bytenr))
-#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) - \
- BTRFS_FILE_EXTENT_INLINE_DATA_START)
-#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) -\
- sizeof(struct btrfs_dir_item))
-
-
/*
* this is a very generous portion of the super block, giving us
* room to translate 14 chunks with 3 stripes each.
#define BTRFS_ROOT_REF_COWS 1
#define BTRFS_ROOT_TRACK_DIRTY 2
#define BTRFS_ROOT_IN_RADIX 3
-#define BTRFS_ROOT_DUMMY_ROOT 4
-#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5
-#define BTRFS_ROOT_DEFRAG_RUNNING 6
-#define BTRFS_ROOT_FORCE_COW 7
-#define BTRFS_ROOT_MULTI_LOG_TASKS 8
-#define BTRFS_ROOT_DIRTY 9
+#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 4
+#define BTRFS_ROOT_DEFRAG_RUNNING 5
+#define BTRFS_ROOT_FORCE_COW 6
+#define BTRFS_ROOT_MULTI_LOG_TASKS 7
+#define BTRFS_ROOT_DIRTY 8
/*
* in ram representation of the tree. extent_root is used for all allocations
u64 highest_objectid;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* only used with CONFIG_BTRFS_FS_RUN_SANITY_TESTS is enabled */
u64 alloc_bytenr;
+#endif
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
atomic_t qgroup_meta_rsv;
};
+static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize)
+{
+ return blocksize - sizeof(struct btrfs_header);
+}
+
+static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_root *root)
+{
+ return __BTRFS_LEAF_DATA_SIZE(root->nodesize);
+}
+
+static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_root *root)
+{
+ return BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_root *root)
+{
+ return BTRFS_LEAF_DATA_SIZE(root) / sizeof(struct btrfs_key_ptr);
+}
+
+#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
+ (offsetof(struct btrfs_file_extent_item, disk_bytenr))
+static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_root *root)
+{
+ return BTRFS_MAX_ITEM_SIZE(root) -
+ BTRFS_FILE_EXTENT_INLINE_DATA_START;
+}
+
+static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_root *root)
+{
+ return BTRFS_MAX_ITEM_SIZE(root) - sizeof(struct btrfs_dir_item);
+}
+
/*
* Flags for mount options.
*
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
-#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
+#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \
BTRFS_MOUNT_##opt)
-#define btrfs_set_and_info(root, opt, fmt, args...) \
+#define btrfs_set_and_info(fs_info, opt, fmt, args...) \
{ \
- if (!btrfs_test_opt(root, opt)) \
- btrfs_info(root->fs_info, fmt, ##args); \
- btrfs_set_opt(root->fs_info->mount_opt, opt); \
+ if (!btrfs_test_opt(fs_info, opt)) \
+ btrfs_info(fs_info, fmt, ##args); \
+ btrfs_set_opt(fs_info->mount_opt, opt); \
}
-#define btrfs_clear_and_info(root, opt, fmt, args...) \
+#define btrfs_clear_and_info(fs_info, opt, fmt, args...) \
{ \
- if (btrfs_test_opt(root, opt)) \
- btrfs_info(root->fs_info, fmt, ##args); \
- btrfs_clear_opt(root->fs_info->mount_opt, opt); \
+ if (btrfs_test_opt(fs_info, opt)) \
+ btrfs_info(fs_info, fmt, ##args); \
+ btrfs_clear_opt(fs_info->mount_opt, opt); \
}
#ifdef CONFIG_BTRFS_DEBUG
btrfs_should_fragment_free_space(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
- return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
+ return (btrfs_test_opt(root->fs_info, FRAGMENT_METADATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
- (btrfs_test_opt(root, FRAGMENT_DATA) &&
+ (btrfs_test_opt(root->fs_info, FRAGMENT_DATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
}
#endif
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
-int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root,
u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *function,
+ const char *function,
unsigned int line, int errno);
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
*/
-#define btrfs_abort_transaction(trans, root, errno) \
+#define btrfs_abort_transaction(trans, errno) \
do { \
/* Report first abort since mount */ \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
- &((root)->fs_info->fs_state))) { \
+ &((trans)->fs_info->fs_state))) { \
WARN(1, KERN_DEBUG \
"BTRFS: Transaction aborted (error %d)\n", \
(errno)); \
} \
- __btrfs_abort_transaction((trans), (root), __func__, \
+ __btrfs_abort_transaction((trans), __func__, \
__LINE__, (errno)); \
} while (0)
void btrfs_test_destroy_inode(struct inode *inode);
#endif
-static inline int btrfs_test_is_dummy_root(struct btrfs_root *root)
+static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ if (unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO,
+ &fs_info->fs_state)))
return 1;
#endif
return 0;
}
-
#endif
--- /dev/null
+/*
+ * Copyright (C) 2016 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_DEDUPE__
+#define __BTRFS_DEDUPE__
+
+/* later in-band dedupe will expand this struct */
+struct btrfs_dedupe_hash;
+#endif
delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
sizeof(struct btrfs_delayed_node),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!delayed_node_cache)
return -ENOMEM;
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
- qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
+ qexisting = btrfs_qgroup_insert_dirty_extent(fs_info,
+ delayed_refs,
qrecord);
if (qexisting)
kfree(qrecord);
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
- trace_add_delayed_ref_head(ref, head_ref, action);
+ trace_add_delayed_ref_head(fs_info, ref, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
full_ref->level = level;
- trace_add_delayed_tree_ref(ref, full_ref, action);
+ trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
full_ref->objectid = owner;
full_ref->offset = offset;
- trace_add_delayed_data_ref(ref, full_ref, action);
+ trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",
sizeof(struct btrfs_delayed_ref_head), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
btrfs_delayed_tree_ref_cachep = kmem_cache_create(
"btrfs_delayed_tree_ref",
sizeof(struct btrfs_delayed_tree_ref), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
btrfs_delayed_data_ref_cachep = kmem_cache_create(
"btrfs_delayed_data_ref",
sizeof(struct btrfs_delayed_data_ref), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
btrfs_delayed_extent_op_cachep = kmem_cache_create(
"btrfs_delayed_extent_op",
sizeof(struct btrfs_delayed_extent_op), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
* missing
*/
if (!dev_replace->srcdev &&
- !btrfs_test_opt(dev_root, DEGRADED)) {
+ !btrfs_test_opt(dev_root->fs_info, DEGRADED)) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
src_devid);
}
if (!dev_replace->tgtdev &&
- !btrfs_test_opt(dev_root, DEGRADED)) {
+ !btrfs_test_opt(dev_root->fs_info, DEGRADED)) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
sizeof(struct btrfs_end_io_wq),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_end_io_wq_cache)
return -ENOMEM;
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr)
{
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return alloc_test_extent_buffer(root->fs_info, bytenr,
root->nodesize);
return alloc_extent_buffer(root->fs_info, bytenr);
struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
+ bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
root->node = NULL;
root->commit_root = NULL;
root->sectorsize = sectorsize;
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
- if (fs_info)
+ if (!dummy)
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
- if (fs_info)
+ if (!dummy)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
+struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
- root = btrfs_alloc_root(NULL, GFP_KERNEL);
+ if (!fs_info)
+ return ERR_PTR(-EINVAL);
+
+ root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
/* We don't use the stripesize in selftest, set it as sectorsize */
- __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+ __setup_root(nodesize, sectorsize, sectorsize, root, fs_info,
BTRFS_ROOT_TREE_OBJECTID);
- set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
return root;
ret = get_anon_bdev(&root->anon_dev);
if (ret)
- goto free_writers;
+ goto fail;
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
&root->highest_objectid);
if (ret) {
mutex_unlock(&root->objectid_mutex);
- goto free_root_dev;
+ goto fail;
}
ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
mutex_unlock(&root->objectid_mutex);
return 0;
-
-free_root_dev:
- free_anon_bdev(root->anon_dev);
-free_writers:
- btrfs_free_subvolume_writers(root->subv_writers);
fail:
- kfree(root->free_ino_ctl);
- kfree(root->free_ino_pinned);
+ /* the caller is responsible to call free_fs_root */
return ret;
}
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
fs_info->workers =
- btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
- max_active, 16);
+ btrfs_alloc_workqueue(fs_info, "worker",
+ flags | WQ_HIGHPRI, max_active, 16);
fs_info->delalloc_workers =
- btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "delalloc",
+ flags, max_active, 2);
fs_info->flush_workers =
- btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "flush_delalloc",
+ flags, max_active, 0);
fs_info->caching_workers =
- btrfs_alloc_workqueue("cache", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
/*
* a higher idle thresh on the submit workers makes it much more
* devices
*/
fs_info->submit_workers =
- btrfs_alloc_workqueue("submit", flags,
+ btrfs_alloc_workqueue(fs_info, "submit", flags,
min_t(u64, fs_devices->num_devices,
max_active), 64);
fs_info->fixup_workers =
- btrfs_alloc_workqueue("fixup", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
/*
* endios are largely parallel and should have a very
* low idle thresh
*/
fs_info->endio_workers =
- btrfs_alloc_workqueue("endio", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
fs_info->endio_meta_workers =
- btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
+ max_active, 4);
fs_info->endio_meta_write_workers =
- btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
+ max_active, 2);
fs_info->endio_raid56_workers =
- btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
+ max_active, 4);
fs_info->endio_repair_workers =
- btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
fs_info->rmw_workers =
- btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
fs_info->endio_write_workers =
- btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "endio-write", flags,
+ max_active, 2);
fs_info->endio_freespace_worker =
- btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
+ max_active, 0);
fs_info->delayed_workers =
- btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
+ max_active, 0);
fs_info->readahead_workers =
- btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "readahead", flags,
+ max_active, 2);
fs_info->qgroup_rescan_workers =
- btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
fs_info->extent_workers =
- btrfs_alloc_workqueue("extent-refs", flags,
+ btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
min_t(u64, fs_devices->num_devices,
max_active), 8);
if (IS_ERR(fs_info->transaction_kthread))
goto fail_cleaner;
- if (!btrfs_test_opt(tree_root, SSD) &&
- !btrfs_test_opt(tree_root, NOSSD) &&
+ if (!btrfs_test_opt(tree_root->fs_info, SSD) &&
+ !btrfs_test_opt(tree_root->fs_info, NOSSD) &&
!fs_info->fs_devices->rotating) {
btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
btrfs_set_opt(fs_info->mount_opt, SSD);
btrfs_apply_pending_changes(fs_info);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
+ if (btrfs_test_opt(tree_root->fs_info, CHECK_INTEGRITY)) {
ret = btrfsic_mount(tree_root, fs_devices,
- btrfs_test_opt(tree_root,
+ btrfs_test_opt(tree_root->fs_info,
CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
1 : 0,
fs_info->check_integrity_print_mask);
/* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 &&
- !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
+ !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) {
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
if (sb->s_flags & MS_RDONLY)
return 0;
- if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+ if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "creating free space tree");
ret = btrfs_create_free_space_tree(fs_info);
btrfs_qgroup_rescan_resume(fs_info);
- if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "clearing free space tree");
ret = btrfs_clear_free_space_tree(fs_info);
close_ctree(tree_root);
return ret;
}
- } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+ } else if (btrfs_test_opt(tree_root->fs_info, RESCAN_UUID_TREE) ||
fs_info->generation !=
btrfs_super_uuid_tree_generation(disk_super)) {
btrfs_info(fs_info, "checking UUID tree");
return err;
recovery_tree_root:
- if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
+ if (!btrfs_test_opt(tree_root->fs_info, USEBACKUPROOT))
goto fail_tree_roots;
free_root_pointers(fs_info, 0);
int total_errors = 0;
u64 flags;
- do_barriers = !btrfs_test_opt(root, NOBARRIER);
+ do_barriers = !btrfs_test_opt(root->fs_info, NOBARRIER);
backup_super_roots(root->fs_info);
sb = root->fs_info->super_for_commit;
iput(fs_info->btree_inode);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ if (btrfs_test_opt(root->fs_info, CHECK_INTEGRITY))
btrfsic_unmount(root, fs_info->fs_devices);
#endif
void btrfs_free_fs_root(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize);
+struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info,
+ u32 sectorsize, u32 nodesize);
#endif
/*
path, bytenr, parent, root_objectid,
owner, offset, refs_to_add);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
out:
btrfs_free_path(path);
return ret;
ins.type = BTRFS_EXTENT_ITEM_KEY;
ref = btrfs_delayed_node_to_data_ref(node);
- trace_run_delayed_data_ref(node, ref, node->action);
+ trace_run_delayed_data_ref(root->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
SKINNY_METADATA);
ref = btrfs_delayed_node_to_tree_ref(node);
- trace_run_delayed_tree_ref(node, ref, node->action);
+ trace_run_delayed_tree_ref(root->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
parent = ref->parent;
*/
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
- trace_run_delayed_ref_head(node, head, node->action);
+ trace_run_delayed_ref_head(root->fs_info, node, head,
+ node->action);
if (insert_reserved) {
btrfs_pin_extent(root, node->bytenr,
u64 num_csums_per_leaf;
u64 num_csums;
- csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+ csum_size = BTRFS_MAX_ITEM_SIZE(root);
num_csums_per_leaf = div64_u64(csum_size,
(u64)btrfs_super_csum_size(root->fs_info->super_copy));
num_csums = div64_u64(csum_bytes, root->sectorsize);
trans->can_flush_pending_bgs = false;
ret = __btrfs_run_delayed_refs(trans, root, count);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
u64, u64, u64, u64, u64, u64);
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return 0;
ref_root = btrfs_header_owner(buf);
* transaction, this only happens in really bad situations
* anyway.
*/
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_put;
}
WARN_ON(ret);
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
- !btrfs_test_opt(root, SPACE_CACHE)) {
+ !btrfs_test_opt(root->fs_info, SPACE_CACHE)) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
struct btrfs_path *path;
if (list_empty(&cur_trans->dirty_bgs) ||
- !btrfs_test_opt(root, SPACE_CACHE))
+ !btrfs_test_opt(root->fs_info, SPACE_CACHE))
return 0;
path = btrfs_alloc_path();
}
spin_unlock(&cur_trans->dirty_bgs_lock);
} else if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
}
cache);
}
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
/* if its not on the io list, we need to put the block group */
thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
btrfs_calc_trans_metadata_size(root, 1);
- if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ if (left < thresh && btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
dump_space_info(info, 0, 0);
*/
if (trans->can_flush_pending_bgs &&
trans->chunk_bytes_reserved >= (u64)SZ_2M) {
- btrfs_create_pending_block_groups(trans, trans->root);
+ btrfs_create_pending_block_groups(trans, extent_root);
btrfs_trans_release_chunk_metadata(trans);
}
return ret;
*/
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
{
- struct btrfs_fs_info *fs_info = trans->root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
if (!trans->chunk_bytes_reserved)
return;
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return;
trace_btrfs_space_reservation(root->fs_info, "delalloc",
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
- if (btrfs_test_opt(root, SPACE_CACHE) &&
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE) &&
cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR;
u64 *empty_cluster)
{
struct btrfs_free_cluster *ret = NULL;
- bool ssd = btrfs_test_opt(root, SSD);
+ bool ssd = btrfs_test_opt(root->fs_info, SSD);
*empty_cluster = 0;
if (btrfs_mixed_space_info(space_info))
break;
}
- if (btrfs_test_opt(root, DISCARD))
+ if (btrfs_test_opt(root->fs_info, DISCARD))
ret = btrfs_discard_extent(root, start,
end + 1 - start, NULL);
NULL, refs_to_drop,
is_data, &last_ref);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
path->nodes[0]);
}
if (ret < 0) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
extent_slot = path->slots[0];
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
owner_offset);
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
} else {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = convert_extent_item_v0(trans, extent_root, path,
owner_objectid, 0);
if (ret < 0) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_print_leaf(extent_root, path->nodes[0]);
}
if (ret < 0) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_err(info, "trying to drop %d refs but we only have %Lu "
"for bytenr %Lu", refs_to_drop, refs, bytenr);
ret = -EINVAL;
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
refs -= refs_to_drop;
iref, refs_to_drop,
is_data, &last_ref);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
if (is_data) {
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
num_bytes);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(fs_info))
return 0;
add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
* can do more things.
*/
if (ret < 0 && ret != -ENOSPC)
- btrfs_abort_transaction(trans,
- root, ret);
+ btrfs_abort_transaction(trans, ret);
else
ret = 0;
if (!exist)
printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
info->flags,
info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly,
- (info->full) ? "" : "not ");
+ info->bytes_reserved - info->bytes_readonly -
+ info->bytes_may_use, (info->full) ? "" : "not ");
printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
"reserved=%llu, may_use=%llu, readonly=%llu\n",
info->total_bytes, info->bytes_used, info->bytes_pinned,
if (num_bytes == min_alloc_size)
final_tried = true;
goto again;
- } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ } else if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, flags);
if (pin)
pin_down_extent(root, cache, start, len, 1);
else {
- if (btrfs_test_opt(root, DISCARD))
+ if (btrfs_test_opt(root->fs_info, DISCARD))
ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
goto again;
}
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
static DEFINE_RATELIMIT_STATE(_rs,
DEFAULT_RATELIMIT_INTERVAL * 10,
/*DEFAULT_RATELIMIT_BURST*/ 1);
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
- if (btrfs_test_is_dummy_root(root)) {
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (btrfs_is_testing(root->fs_info)) {
buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
level);
if (!IS_ERR(buf))
root->alloc_bytenr += blocksize;
return buf;
}
+#endif
block_rsv = use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
+ if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
+ delayed_refs, qrecord))
kfree(qrecord);
spin_unlock(&delayed_refs->lock);
&root->root_key,
root_item);
if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
ret = btrfs_del_root(trans, tree_root, &root->root_key);
if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
if (ret < 0) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
} else if (ret > 0) {
int full = 0;
int ret = 0;
- debug = btrfs_test_opt(root, ENOSPC_DEBUG);
+ debug = btrfs_test_opt(root->fs_info, ENOSPC_DEBUG);
block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- ret = 0;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ em_tree = &root->fs_info->mapping_tree.map_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, found_key.objectid,
+ found_key.offset);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ btrfs_err(root->fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ found_key.objectid, found_key.offset);
+ ret = -ENOENT;
+ } else {
+ ret = 0;
+ }
goto out;
}
path->slots[0]++;
path->reada = READA_FORWARD;
cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
- if (btrfs_test_opt(root, SPACE_CACHE) &&
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE) &&
btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
need_clear = 1;
- if (btrfs_test_opt(root, CLEAR_CACHE))
+ if (btrfs_test_opt(root->fs_info, CLEAR_CACHE))
need_clear = 1;
while (1) {
* b) Setting 'dirty flag' makes sure that we flush
* the new space cache info onto disk.
*/
- if (btrfs_test_opt(root, SPACE_CACHE))
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE))
cache->disk_cache_state = BTRFS_DC_CLEAR;
}
ret = btrfs_insert_item(trans, extent_root, &key, &item,
sizeof(item));
if (ret)
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
ret = btrfs_finish_chunk_alloc(trans, extent_root,
key.objectid, key.offset);
if (ret)
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, root->fs_info, block_group);
/* already aborted the transaction if it failed. */
next:
spin_lock(&block_group->space_info->lock);
list_del_init(&block_group->ro_list);
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
WARN_ON(block_group->space_info->total_bytes
< block_group->key.offset);
WARN_ON(block_group->space_info->bytes_readonly
spin_unlock(&space_info->lock);
/* DISCARD can flip during remount */
- trimming = btrfs_test_opt(root, DISCARD);
+ trimming = btrfs_test_opt(root->fs_info, DISCARD);
/* Implicit trim during transaction commit. */
if (trimming)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
sizeof(struct extent_state), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
sizeof(struct extent_buffer), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
if (tree->ops && tree->ops->merge_bio_hook)
ret = tree->ops->merge_bio_hook(page, offset, size, bio,
bio_flags);
- BUG_ON(ret < 0);
return ret;
}
* into the tree that are removed when the IO is done (by the end_io
* handlers)
* XXX JDM: This needs looking at to ensure proper page locking
+ * return 0 on success, otherwise return error
*/
static int __do_readpage(struct extent_io_tree *tree,
struct page *page,
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
- int ret;
+ int ret = 0;
int nr = 0;
size_t pg_offset = 0;
size_t iosize;
} else {
SetPageError(page);
unlock_extent(tree, cur, cur + iosize - 1);
+ goto out;
}
cur = cur + iosize;
pg_offset += iosize;
SetPageUptodate(page);
unlock_page(page);
}
- return 0;
+ return ret;
}
static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = eb->pages[i];
+
if (!PageUptodate(page)) {
+ if (ret) {
+ atomic_dec(&eb->io_pages);
+ unlock_page(page);
+ continue;
+ }
+
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
mirror_num, &bio_flags,
REQ_META);
- if (err)
+ if (err) {
ret = err;
+ /*
+ * We use &bio in above __extent_read_full_page,
+ * so we ensure that if it returns error, the
+ * current page fails to add itself to bio and
+ * it's been unlocked.
+ *
+ * We must dec io_pages by ourselves.
+ */
+ atomic_dec(&eb->io_pages);
+ }
} else {
unlock_page(page);
}
{
extent_map_cache = kmem_cache_create("btrfs_extent_map",
sizeof(struct extent_map), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!extent_map_cache)
return -ENOMEM;
return 0;
offset + root->sectorsize - 1,
EXTENT_NODATASUM);
} else {
- btrfs_info(BTRFS_I(inode)->root->fs_info,
+ btrfs_info_rl(BTRFS_I(inode)->root->fs_info,
"no csum found for inode %llu start %llu",
btrfs_ino(inode), offset);
}
*/
ret = btrfs_split_item(trans, root, path, &key, offset);
if (ret && ret != -EAGAIN) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
static inline int __need_auto_defrag(struct btrfs_root *root)
{
- if (!btrfs_test_opt(root, AUTO_DEFRAG))
+ if (!btrfs_test_opt(root->fs_info, AUTO_DEFRAG))
return 0;
if (btrfs_fs_closing(root->fs_info))
ret = btrfs_del_items(trans, root, path, del_slot,
del_nr);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
path->slots[0] = del_slot;
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
leaf = path->nodes[0];
goto again;
}
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
{
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
sizeof(struct inode_defrag), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_inode_defrag_cachep)
return -ENOMEM;
if (locked)
mutex_unlock(&trans->transaction->cache_write_mutex);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
* For metadata, allow allocates with smaller extents. For
* data, keep it dense.
*/
- if (btrfs_test_opt(root, SSD_SPREAD)) {
+ if (btrfs_test_opt(root->fs_info, SSD_SPREAD)) {
cont1_bytes = min_bytes = bytes + empty_size;
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
cont1_bytes = bytes;
int ret = 0;
u64 root_gen = btrfs_root_generation(&root->root_item);
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
/*
struct btrfs_io_ctl io_ctl;
bool release_metadata = true;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
memset(&io_ctl, 0, sizeof(io_ctl));
out:
kvfree(bitmap);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
out:
kvfree(bitmap);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
out:
btrfs_free_path(path);
if (ret)
- btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
out:
btrfs_free_path(path);
if (ret)
- btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
abort:
fs_info->creating_free_space_tree = 0;
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, tree_root);
return ret;
}
return 0;
abort:
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, tree_root);
return ret;
}
btrfs_free_path(path);
mutex_unlock(&block_group->free_space_lock);
if (ret)
- btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
out:
btrfs_free_path(path);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
int slot;
int ret;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
int ret;
u64 objectid;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
spin_lock(&root->ino_cache_lock);
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
{
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return btrfs_find_free_objectid(root, objectid);
again:
{
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
again:
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
struct rb_node *n;
u64 count;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
while (1) {
if (btrfs_root_refs(&root->root_item) == 0)
return 0;
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_put;
}
ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
if (ret) {
if (ret != -ENOSPC)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_put;
}
}
#include "hash.h"
#include "props.h"
#include "qgroup.h"
+#include "dedupe.h"
struct btrfs_iget_args {
struct btrfs_key *location;
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock);
+ u64 start, u64 end, u64 delalloc_end,
+ int *page_started, unsigned long *nr_written,
+ int unlock, struct btrfs_dedupe_hash *hash);
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
start, aligned_end, NULL,
1, 1, extent_item_size, &extent_inserted);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == -ENOSPC) {
ret = 1;
struct btrfs_root *root = BTRFS_I(inode)->root;
/* force compress */
- if (btrfs_test_opt(root, FORCE_COMPRESS))
+ if (btrfs_test_opt(root->fs_info, FORCE_COMPRESS))
return 1;
/* bad compression ratios */
if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
return 0;
- if (btrfs_test_opt(root, COMPRESS) ||
+ if (btrfs_test_opt(root->fs_info, COMPRESS) ||
BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
BTRFS_I(inode)->force_compress)
return 1;
will_compress = 0;
} else {
num_bytes = total_in;
+ *num_added += 1;
+
+ /*
+ * The async work queues will take care of doing actual
+ * allocation on disk for these compressed pages, and
+ * will submit them to the elevator.
+ */
+ add_async_extent(async_cow, start, num_bytes,
+ total_compressed, pages, nr_pages_ret,
+ compress_type);
+
+ if (start + num_bytes < end) {
+ start += num_bytes;
+ pages = NULL;
+ cond_resched();
+ goto again;
+ }
+ return;
}
}
- if (!will_compress && pages) {
+ if (pages) {
/*
* the compression code ran but failed to make things smaller,
* free any pages it allocated and our page pointer array
nr_pages_ret = 0;
/* flag the file so we don't compress in the future */
- if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
+ if (!btrfs_test_opt(root->fs_info, FORCE_COMPRESS) &&
!(BTRFS_I(inode)->force_compress)) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
}
}
- if (will_compress) {
- *num_added += 1;
-
- /* the async work queues will take care of doing actual
- * allocation on disk for these compressed pages,
- * and will submit them to the elevator.
- */
- add_async_extent(async_cow, start, num_bytes,
- total_compressed, pages, nr_pages_ret,
- compress_type);
-
- if (start + num_bytes < end) {
- start += num_bytes;
- pages = NULL;
- cond_resched();
- goto again;
- }
- } else {
cleanup_and_bail_uncompressed:
- /*
- * No compression, but we still need to write the pages in
- * the file we've been given so far. redirty the locked
- * page if it corresponds to our extent and set things up
- * for the async work queue to run cow_file_range to do
- * the normal delalloc dance
- */
- if (page_offset(locked_page) >= start &&
- page_offset(locked_page) <= end) {
- __set_page_dirty_nobuffers(locked_page);
- /* unlocked later on in the async handlers */
- }
- if (redirty)
- extent_range_redirty_for_io(inode, start, end);
- add_async_extent(async_cow, start, end - start + 1,
- 0, NULL, 0, BTRFS_COMPRESS_NONE);
- *num_added += 1;
- }
+ /*
+ * No compression, but we still need to write the pages in the file
+ * we've been given so far. redirty the locked page if it corresponds
+ * to our extent and set things up for the async work queue to run
+ * cow_file_range to do the normal delalloc dance.
+ */
+ if (page_offset(locked_page) >= start &&
+ page_offset(locked_page) <= end)
+ __set_page_dirty_nobuffers(locked_page);
+ /* unlocked later on in the async handlers */
+
+ if (redirty)
+ extent_range_redirty_for_io(inode, start, end);
+ add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
+ BTRFS_COMPRESS_NONE);
+ *num_added += 1;
return;
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- &page_started, &nr_written, 0);
+ async_extent->start +
+ async_extent->ram_size - 1,
+ &page_started, &nr_written, 0,
+ NULL);
/* JDM XXX */
*/
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
+ u64 start, u64 end, u64 delalloc_end,
+ int *page_started, unsigned long *nr_written,
+ int unlock, struct btrfs_dedupe_hash *hash)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 alloc_hint = 0;
async_cow->start = start;
if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(root, FORCE_COMPRESS))
+ !btrfs_test_opt(root->fs_info, FORCE_COMPRESS))
cur_end = end;
else
cur_end = min(end, start + SZ_512K - 1);
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
- page_started, nr_written, 1);
+ end, page_started, nr_written, 1,
+ NULL);
if (ret) {
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
}
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
+ ret = cow_file_range(inode, locked_page, cow_start, end, end,
+ page_started, nr_written, 1, NULL);
if (ret)
goto error;
}
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
} else if (!inode_need_compress(inode)) {
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1);
+ ret = cow_file_range(inode, locked_page, start, end, end,
+ page_started, nr_written, 1, NULL);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
}
/* For sanity tests */
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return;
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
btrfs_delalloc_release_metadata(inode, len);
/* For sanity tests. */
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return;
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
/*
* extent_io.c merge_bio_hook, this must check the chunk tree to make sure
* we don't create bios that span stripes or chunks
+ *
+ * return 1 if page cannot be merged to bio
+ * return 0 if page can be merged to bio
+ * return error otherwise
*/
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
map_length = length;
ret = btrfs_map_block(root->fs_info, bio_op(bio), logical,
&map_length, NULL, 0);
- /* Will always return 0 with map_multi == NULL */
- BUG_ON(ret < 0);
+ if (ret < 0)
+ return ret;
if (map_length < length + size)
return 1;
return 0;
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(*extent));
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
backref->root_id, backref->inum,
new->file_pos); /* start - extent_offset */
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ordered_extent->file_offset, ordered_extent->len,
trans->transid);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_unlock;
}
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_unlock;
}
ret = 0;
ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
else
clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
&root->state);
if (ret != -EEXIST) {
clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
}
btrfs_info(root->fs_info,
"failed to delete reference to %.*s, inode %llu parent %llu",
name_len, name, ino, dir_ino);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto err;
}
skip_backref:
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto err;
}
ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
inode, dir_ino);
if (ret != 0 && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto err;
}
if (ret == -ENOENT)
ret = 0;
else if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err:
btrfs_free_path(path);
if (ret)
WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
dir_ino, &index, name, name_len);
if (ret < 0) {
if (ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
di = btrfs_search_dir_index_item(root, path, dir_ino,
ret = -ENOENT;
else
ret = PTR_ERR(di);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
out:
btrfs_free_path(path);
return ret;
pending_del_nr);
if (err) {
btrfs_abort_transaction(trans,
- root,
err);
goto error;
}
item_end,
new_size);
if (err) {
- btrfs_abort_transaction(trans,
- root, err);
+ btrfs_abort_transaction(trans, err);
goto error;
}
} else if (test_bit(BTRFS_ROOT_REF_COWS,
pending_del_slot,
pending_del_nr);
if (ret) {
- btrfs_abort_transaction(trans,
- root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error;
}
pending_del_nr = 0;
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
}
error:
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
return ret;
}
ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
0, 0, len, 0, len, 0, 0, 0);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
else
btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
err = btrfs_orphan_del(trans, inode);
if (err)
- btrfs_abort_transaction(trans, root, err);
+ btrfs_abort_transaction(trans, err);
btrfs_end_transaction(trans, root);
}
}
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv, *global_rsv;
int steal_from_global = 0;
- u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
+ u64 min_size;
int ret;
trace_btrfs_inode_evict(inode);
+ if (!root) {
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ return;
+ }
+
+ min_size = btrfs_calc_trunc_metadata_size(root, 1);
+
evict_inode_truncate_pages(inode);
if (inode->i_nlink &&
btrfs_inherit_iflags(inode, dir);
if (S_ISREG(mode)) {
- if (btrfs_test_opt(root, NODATASUM))
+ if (btrfs_test_opt(root->fs_info, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(root->fs_info, NODATACOW))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
}
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
else if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
}
current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
return ret;
fail_dir_item:
btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
sizeof(struct btrfs_trans_handle), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
if (!btrfs_trans_handle_cachep)
goto fail;
btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
sizeof(struct btrfs_transaction), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
if (!btrfs_transaction_cachep)
goto fail;
btrfs_path_cachep = kmem_cache_create("btrfs_path",
sizeof(struct btrfs_path), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_path_cachep)
goto fail;
btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
sizeof(struct btrfs_free_space), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_free_space_cachep)
goto fail;
ret = btrfs_update_inode(trans, root, old_inode);
}
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
ret = btrfs_update_inode(trans, dest, new_inode);
}
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
new_dentry->d_name.name,
new_dentry->d_name.len, 0, old_idx);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
old_dentry->d_name.name,
old_dentry->d_name.len, 0, new_idx);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
ret = btrfs_update_inode(trans, root, old_inode);
}
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
if (!ret && new_inode->i_nlink == 0)
ret = btrfs_orphan_add(trans, d_inode(new_dentry));
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
}
new_dentry->d_name.name,
new_dentry->d_name.len, 0, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
old_dentry);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
}
}
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid,
ins.offset, 0);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
break;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
break;
new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
if (ret) {
/* We potentially lose an unused inode item here */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
*/
ret = btrfs_set_inode_index(dir, &index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
name, namelen, dir, &key,
BTRFS_FT_DIR, index);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
objectid);
if (ret)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
fail:
kfree(root_item);
return 1;
}
-static noinline int copy_to_sk(struct btrfs_root *root,
- struct btrfs_path *path,
+static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk,
size_t *buf_size,
ret = 0;
goto err;
}
- ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
+ ret = copy_to_sk(path, &key, sk, buf_size, ubuf,
&sk_offset, &num_found);
btrfs_release_path(path);
if (ret)
* rmdir(2).
*/
err = -EPERM;
- if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ if (!btrfs_test_opt(root->fs_info, USER_SUBVOL_RM_ALLOWED))
goto out_dput;
/*
dentry->d_name.len);
if (ret) {
err = ret;
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
root->fs_info->tree_root,
dest->root_key.objectid);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
err = ret;
goto out_end_trans;
}
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
goto out;
}
if (ret) {
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
- root, ret);
+ ret);
btrfs_end_transaction(trans, root);
goto out;
}
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
goto out;
}
new_key.offset - datao);
if (ret) {
btrfs_abort_transaction(trans,
- root,
ret);
btrfs_end_transaction(trans,
root);
if (ret) {
if (ret != -EOPNOTSUPP)
btrfs_abort_transaction(trans,
- root,
ret);
btrfs_end_transaction(trans, root);
goto out;
last_dest_end, destoff + len, 1);
if (ret) {
if (ret != -EOPNOTSUPP)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, root);
goto out;
}
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_commit_transaction(trans, root);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
sizeof(struct btrfs_ordered_extent), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_ordered_extent_cache)
return -ENOMEM;
struct btrfs_root *root,
struct btrfs_root *parent_root)
{
+ struct super_block *sb = root->fs_info->sb;
struct btrfs_key key;
struct inode *parent_inode, *child_inode;
int ret;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
- parent_root, NULL);
+ parent_inode = btrfs_iget(sb, &key, parent_root, NULL);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
- child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+ child_inode = btrfs_iget(sb, &key, root, NULL);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);
struct extent_buffer *leaf;
struct btrfs_key key;
- if (btrfs_test_is_dummy_root(quota_root))
+ if (btrfs_is_testing(quota_root->fs_info))
return 0;
path = btrfs_alloc_path();
int ret;
int slot;
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return 0;
key.objectid = 0;
return ret;
}
-struct btrfs_qgroup_extent_record
-*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_qgroup_extent_record *record)
+struct btrfs_qgroup_extent_record *
+btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record)
{
struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
struct rb_node *parent_node = NULL;
u64 bytenr = record->bytenr;
assert_spin_locked(&delayed_refs->lock);
- trace_btrfs_qgroup_insert_dirty_extent(record);
+ trace_btrfs_qgroup_insert_dirty_extent(fs_info, record);
while (*p) {
parent_node = *p;
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
- trace_qgroup_update_counters(qg->qgroupid, cur_old_count,
- cur_new_count);
+ trace_qgroup_update_counters(fs_info, qg->qgroupid,
+ cur_old_count, cur_new_count);
/* Rfer update part */
if (cur_old_count == 0 && cur_new_count > 0) {
goto out_free;
BUG_ON(!fs_info->quota_root);
- trace_btrfs_qgroup_account_extent(bytenr, num_bytes, nr_old_roots,
- nr_new_roots);
+ trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
+ nr_old_roots, nr_new_roots);
qgroups = ulist_alloc(GFP_NOFS);
if (!qgroups) {
record = rb_entry(node, struct btrfs_qgroup_extent_record,
node);
- trace_btrfs_qgroup_account_extents(record);
+ trace_btrfs_qgroup_account_extents(fs_info, record);
if (!ret) {
/*
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
- btrfs_err(trans->root->fs_info,
+ btrfs_err(trans->fs_info,
"qgroups not uptodate in trans handle %p: list is%s empty, "
"seq is %#x.%x",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
struct btrfs_delayed_extent_op;
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-struct btrfs_qgroup_extent_record
-*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_qgroup_extent_record *record);
+struct btrfs_qgroup_extent_record *
+btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record);
int
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
- trace_btrfs_qgroup_free_delayed_ref(ref_root, num_bytes);
+ trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
cache->last_trans = 0;
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
- BUG_ON(!list_empty(&cache->pending[i]));
- BUG_ON(!list_empty(&cache->changed));
- BUG_ON(!list_empty(&cache->detached));
- BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
- BUG_ON(cache->nr_nodes);
- BUG_ON(cache->nr_edges);
+ ASSERT(list_empty(&cache->pending[i]));
+ ASSERT(list_empty(&cache->changed));
+ ASSERT(list_empty(&cache->detached));
+ ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
+ ASSERT(!cache->nr_nodes);
+ ASSERT(!cache->nr_edges);
}
static struct backref_node *alloc_backref_node(struct backref_cache *cache)
lower = list_entry(useless.next,
struct backref_node, list);
list_del_init(&lower->list);
+ if (lower == node)
+ node = NULL;
free_backref_node(cache, lower);
}
+
+ free_backref_node(cache, node);
return ERR_PTR(err);
}
ASSERT(!node || !node->detached);
btrfs_header_owner(leaf),
key.objectid, key.offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
parent, btrfs_header_owner(leaf),
key.objectid, key.offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
break;
}
}
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_del_item(trans, root, path);
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
l = path->nodes[0];
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name_len);
if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
return ret;
}
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
fs_info->scrub_workers =
- btrfs_alloc_workqueue("scrub", flags,
+ btrfs_alloc_workqueue(fs_info, "scrub", flags,
1, 4);
else
fs_info->scrub_workers =
- btrfs_alloc_workqueue("scrub", flags,
+ btrfs_alloc_workqueue(fs_info, "scrub", flags,
max_active, 4);
if (!fs_info->scrub_workers)
goto fail_scrub_workers;
fs_info->scrub_wr_completion_workers =
- btrfs_alloc_workqueue("scrubwrc", flags,
+ btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
max_active, 2);
if (!fs_info->scrub_wr_completion_workers)
goto fail_scrub_wr_completion_workers;
fs_info->scrub_nocow_workers =
- btrfs_alloc_workqueue("scrubnc", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
if (!fs_info->scrub_nocow_workers)
goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
- btrfs_alloc_workqueue("scrubparity", flags,
+ btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
max_active, 2);
if (!fs_info->scrub_parity_workers)
goto fail_scrub_parity_workers;
if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
/* not supported for data w/o checksums */
- btrfs_err(fs_info,
+ btrfs_err_rl(fs_info,
"scrub: size assumption sectorsize != PAGE_SIZE "
"(%d != %lu) fails",
fs_info->chunk_root->sectorsize, PAGE_SIZE);
"debug",
};
+
+/*
+ * Use one ratelimit state per log level so that a flood of less important
+ * messages doesn't cause more important ones to be dropped.
+ */
+static struct ratelimit_state printk_limits[] = {
+ RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
+ RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
+};
+
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
struct super_block *sb = fs_info->sb;
va_list args;
const char *type = logtypes[4];
int kern_level;
+ struct ratelimit_state *ratelimit;
va_start(args, fmt);
lvl[size] = '\0';
fmt += size;
type = logtypes[kern_level - '0'];
- } else
+ ratelimit = &printk_limits[kern_level - '0'];
+ } else {
*lvl = '\0';
+ /* Default to debug output */
+ ratelimit = &printk_limits[7];
+ }
vaf.fmt = fmt;
vaf.va = &args;
- printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
+ if (__ratelimit(ratelimit))
+ printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
va_end(args);
}
*/
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *function,
+ const char *function,
unsigned int line, int errno)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
const char *errstr;
errstr = btrfs_decode_error(errno);
- btrfs_warn(root->fs_info,
+ btrfs_warn(fs_info,
"%s:%d: Aborting unused transaction(%s).",
function, line, errstr);
return;
}
ACCESS_ONCE(trans->transaction->aborted) = errno;
/* Wake up anybody who may be waiting on this transaction */
- wake_up(&root->fs_info->transaction_wait);
- wake_up(&root->fs_info->transaction_blocked_wait);
- __btrfs_handle_fs_error(root->fs_info, function, line, errno, NULL);
+ wake_up(&fs_info->transaction_wait);
+ wake_up(&fs_info->transaction_blocked_wait);
+ __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
}
/*
* __btrfs_panic decodes unexpected, fatal errors from the caller,
*/
break;
case Opt_nodatasum:
- btrfs_set_and_info(root, NODATASUM,
+ btrfs_set_and_info(info, NODATASUM,
"setting nodatasum");
break;
case Opt_datasum:
- if (btrfs_test_opt(root, NODATASUM)) {
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(info, NODATASUM)) {
+ if (btrfs_test_opt(info, NODATACOW))
btrfs_info(root->fs_info, "setting datasum, datacow enabled");
else
btrfs_info(root->fs_info, "setting datasum");
btrfs_clear_opt(info->mount_opt, NODATASUM);
break;
case Opt_nodatacow:
- if (!btrfs_test_opt(root, NODATACOW)) {
- if (!btrfs_test_opt(root, COMPRESS) ||
- !btrfs_test_opt(root, FORCE_COMPRESS)) {
+ if (!btrfs_test_opt(info, NODATACOW)) {
+ if (!btrfs_test_opt(info, COMPRESS) ||
+ !btrfs_test_opt(info, FORCE_COMPRESS)) {
btrfs_info(root->fs_info,
"setting nodatacow, compression disabled");
} else {
btrfs_set_opt(info->mount_opt, NODATASUM);
break;
case Opt_datacow:
- btrfs_clear_and_info(root, NODATACOW,
+ btrfs_clear_and_info(info, NODATACOW,
"setting datacow");
break;
case Opt_compress_force:
/* Fallthrough */
case Opt_compress:
case Opt_compress_type:
- saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+ saved_compress_type = btrfs_test_opt(info,
+ COMPRESS) ?
info->compress_type : BTRFS_COMPRESS_NONE;
saved_compress_force =
- btrfs_test_opt(root, FORCE_COMPRESS);
+ btrfs_test_opt(info, FORCE_COMPRESS);
if (token == Opt_compress ||
token == Opt_compress_force ||
strcmp(args[0].from, "zlib") == 0) {
*/
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
}
- if ((btrfs_test_opt(root, COMPRESS) &&
+ if ((btrfs_test_opt(info, COMPRESS) &&
(info->compress_type != saved_compress_type ||
compress_force != saved_compress_force)) ||
- (!btrfs_test_opt(root, COMPRESS) &&
+ (!btrfs_test_opt(info, COMPRESS) &&
no_compress == 1)) {
btrfs_info(root->fs_info,
"%s %s compression",
compress_force = false;
break;
case Opt_ssd:
- btrfs_set_and_info(root, SSD,
+ btrfs_set_and_info(info, SSD,
"use ssd allocation scheme");
break;
case Opt_ssd_spread:
- btrfs_set_and_info(root, SSD_SPREAD,
+ btrfs_set_and_info(info, SSD_SPREAD,
"use spread ssd allocation scheme");
btrfs_set_opt(info->mount_opt, SSD);
break;
case Opt_nossd:
- btrfs_set_and_info(root, NOSSD,
+ btrfs_set_and_info(info, NOSSD,
"not using ssd allocation scheme");
btrfs_clear_opt(info->mount_opt, SSD);
break;
case Opt_barrier:
- btrfs_clear_and_info(root, NOBARRIER,
+ btrfs_clear_and_info(info, NOBARRIER,
"turning on barriers");
break;
case Opt_nobarrier:
- btrfs_set_and_info(root, NOBARRIER,
+ btrfs_set_and_info(info, NOBARRIER,
"turning off barriers");
break;
case Opt_thread_pool:
root->fs_info->sb->s_flags &= ~MS_POSIXACL;
break;
case Opt_notreelog:
- btrfs_set_and_info(root, NOTREELOG,
+ btrfs_set_and_info(info, NOTREELOG,
"disabling tree log");
break;
case Opt_treelog:
- btrfs_clear_and_info(root, NOTREELOG,
+ btrfs_clear_and_info(info, NOTREELOG,
"enabling tree log");
break;
case Opt_norecovery:
case Opt_nologreplay:
- btrfs_set_and_info(root, NOLOGREPLAY,
+ btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
case Opt_flushoncommit:
- btrfs_set_and_info(root, FLUSHONCOMMIT,
+ btrfs_set_and_info(info, FLUSHONCOMMIT,
"turning on flush-on-commit");
break;
case Opt_noflushoncommit:
- btrfs_clear_and_info(root, FLUSHONCOMMIT,
+ btrfs_clear_and_info(info, FLUSHONCOMMIT,
"turning off flush-on-commit");
break;
case Opt_ratio:
}
break;
case Opt_discard:
- btrfs_set_and_info(root, DISCARD,
+ btrfs_set_and_info(info, DISCARD,
"turning on discard");
break;
case Opt_nodiscard:
- btrfs_clear_and_info(root, DISCARD,
+ btrfs_clear_and_info(info, DISCARD,
"turning off discard");
break;
case Opt_space_cache:
strcmp(args[0].from, "v1") == 0) {
btrfs_clear_opt(root->fs_info->mount_opt,
FREE_SPACE_TREE);
- btrfs_set_and_info(root, SPACE_CACHE,
+ btrfs_set_and_info(info, SPACE_CACHE,
"enabling disk space caching");
} else if (strcmp(args[0].from, "v2") == 0) {
btrfs_clear_opt(root->fs_info->mount_opt,
SPACE_CACHE);
- btrfs_set_and_info(root, FREE_SPACE_TREE,
+ btrfs_set_and_info(info,
+ FREE_SPACE_TREE,
"enabling free space tree");
} else {
ret = -EINVAL;
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break;
case Opt_no_space_cache:
- if (btrfs_test_opt(root, SPACE_CACHE)) {
- btrfs_clear_and_info(root, SPACE_CACHE,
+ if (btrfs_test_opt(info, SPACE_CACHE)) {
+ btrfs_clear_and_info(info,
+ SPACE_CACHE,
"disabling disk space caching");
}
- if (btrfs_test_opt(root, FREE_SPACE_TREE)) {
- btrfs_clear_and_info(root, FREE_SPACE_TREE,
+ if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
+ btrfs_clear_and_info(info,
+ FREE_SPACE_TREE,
"disabling free space tree");
}
break;
"disabling inode map caching");
break;
case Opt_clear_cache:
- btrfs_set_and_info(root, CLEAR_CACHE,
+ btrfs_set_and_info(info, CLEAR_CACHE,
"force clearing of disk cache");
break;
case Opt_user_subvol_rm_allowed:
btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
break;
case Opt_defrag:
- btrfs_set_and_info(root, AUTO_DEFRAG,
+ btrfs_set_and_info(info, AUTO_DEFRAG,
"enabling auto defrag");
break;
case Opt_nodefrag:
- btrfs_clear_and_info(root, AUTO_DEFRAG,
+ btrfs_clear_and_info(info, AUTO_DEFRAG,
"disabling auto defrag");
break;
case Opt_recovery:
/*
* Extra check for current option against current flag
*/
- if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
+ if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
btrfs_err(root->fs_info,
"nologreplay must be used with ro mount option");
ret = -EINVAL;
}
out:
if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
- !btrfs_test_opt(root, FREE_SPACE_TREE) &&
- !btrfs_test_opt(root, CLEAR_CACHE)) {
+ !btrfs_test_opt(info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(info, CLEAR_CACHE)) {
btrfs_err(root->fs_info, "cannot disable free space tree");
ret = -EINVAL;
}
- if (!ret && btrfs_test_opt(root, SPACE_CACHE))
+ if (!ret && btrfs_test_opt(info, SPACE_CACHE))
btrfs_info(root->fs_info, "disk space caching is enabled");
- if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE))
+ if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(root->fs_info, "using free space tree");
kfree(orig);
return ret;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- trace_btrfs_sync_fs(wait);
+ trace_btrfs_sync_fs(fs_info, wait);
if (!wait) {
filemap_flush(fs_info->btree_inode->i_mapping);
struct btrfs_root *root = info->tree_root;
char *compress_type;
- if (btrfs_test_opt(root, DEGRADED))
+ if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded");
- if (btrfs_test_opt(root, NODATASUM))
+ if (btrfs_test_opt(info, NODATASUM))
seq_puts(seq, ",nodatasum");
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(info, NODATACOW))
seq_puts(seq, ",nodatacow");
- if (btrfs_test_opt(root, NOBARRIER))
+ if (btrfs_test_opt(info, NOBARRIER))
seq_puts(seq, ",nobarrier");
if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
seq_printf(seq, ",max_inline=%llu", info->max_inline);
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
- if (btrfs_test_opt(root, COMPRESS)) {
+ if (btrfs_test_opt(info, COMPRESS)) {
if (info->compress_type == BTRFS_COMPRESS_ZLIB)
compress_type = "zlib";
else
compress_type = "lzo";
- if (btrfs_test_opt(root, FORCE_COMPRESS))
+ if (btrfs_test_opt(info, FORCE_COMPRESS))
seq_printf(seq, ",compress-force=%s", compress_type);
else
seq_printf(seq, ",compress=%s", compress_type);
}
- if (btrfs_test_opt(root, NOSSD))
+ if (btrfs_test_opt(info, NOSSD))
seq_puts(seq, ",nossd");
- if (btrfs_test_opt(root, SSD_SPREAD))
+ if (btrfs_test_opt(info, SSD_SPREAD))
seq_puts(seq, ",ssd_spread");
- else if (btrfs_test_opt(root, SSD))
+ else if (btrfs_test_opt(info, SSD))
seq_puts(seq, ",ssd");
- if (btrfs_test_opt(root, NOTREELOG))
+ if (btrfs_test_opt(info, NOTREELOG))
seq_puts(seq, ",notreelog");
- if (btrfs_test_opt(root, NOLOGREPLAY))
+ if (btrfs_test_opt(info, NOLOGREPLAY))
seq_puts(seq, ",nologreplay");
- if (btrfs_test_opt(root, FLUSHONCOMMIT))
+ if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
- if (btrfs_test_opt(root, DISCARD))
+ if (btrfs_test_opt(info, DISCARD))
seq_puts(seq, ",discard");
if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
seq_puts(seq, ",noacl");
- if (btrfs_test_opt(root, SPACE_CACHE))
+ if (btrfs_test_opt(info, SPACE_CACHE))
seq_puts(seq, ",space_cache");
- else if (btrfs_test_opt(root, FREE_SPACE_TREE))
+ else if (btrfs_test_opt(info, FREE_SPACE_TREE))
seq_puts(seq, ",space_cache=v2");
else
seq_puts(seq, ",nospace_cache");
- if (btrfs_test_opt(root, RESCAN_UUID_TREE))
+ if (btrfs_test_opt(info, RESCAN_UUID_TREE))
seq_puts(seq, ",rescan_uuid_tree");
- if (btrfs_test_opt(root, CLEAR_CACHE))
+ if (btrfs_test_opt(info, CLEAR_CACHE))
seq_puts(seq, ",clear_cache");
- if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
seq_puts(seq, ",user_subvol_rm_allowed");
- if (btrfs_test_opt(root, ENOSPC_DEBUG))
+ if (btrfs_test_opt(info, ENOSPC_DEBUG))
seq_puts(seq, ",enospc_debug");
- if (btrfs_test_opt(root, AUTO_DEFRAG))
+ if (btrfs_test_opt(info, AUTO_DEFRAG))
seq_puts(seq, ",autodefrag");
- if (btrfs_test_opt(root, INODE_MAP_CACHE))
+ if (btrfs_test_opt(info, INODE_MAP_CACHE))
seq_puts(seq, ",inode_cache");
- if (btrfs_test_opt(root, SKIP_BALANCE))
+ if (btrfs_test_opt(info, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
+ if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
seq_puts(seq, ",check_int_data");
- else if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ else if (btrfs_test_opt(info, CHECK_INTEGRITY))
seq_puts(seq, ",check_int");
if (info->check_integrity_print_mask)
seq_printf(seq, ",check_int_print_mask=%d",
if (info->metadata_ratio)
seq_printf(seq, ",metadata_ratio=%d",
info->metadata_ratio);
- if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
+ if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval);
#ifdef CONFIG_BTRFS_DEBUG
- if (btrfs_test_opt(root, FRAGMENT_DATA))
+ if (btrfs_test_opt(info, FRAGMENT_DATA))
seq_puts(seq, ",fragment=data");
- if (btrfs_test_opt(root, FRAGMENT_METADATA))
+ if (btrfs_test_opt(info, FRAGMENT_METADATA))
seq_puts(seq, ",fragment=metadata");
#endif
seq_printf(seq, ",subvolid=%llu",
* chunk).
*
* If metadata is exhausted, f_bavail will be 0.
- *
- * FIXME: not accurate for mixed block groups, total and free/used are ok,
- * available appears slightly larger.
*/
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
btrfs_crc32c_impl());
}
-static int btrfs_run_sanity_tests(void)
-{
- int ret, i;
- u32 sectorsize, nodesize;
- u32 test_sectorsize[] = {
- PAGE_SIZE,
- };
- ret = btrfs_init_test_fs();
- if (ret)
- return ret;
- for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
- sectorsize = test_sectorsize[i];
- for (nodesize = sectorsize;
- nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
- nodesize <<= 1) {
- pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n",
- sectorsize, nodesize);
- ret = btrfs_test_free_space_cache(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_extent_buffer_operations(sectorsize,
- nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_extent_io(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_inodes(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_qgroups(sectorsize, nodesize);
- if (ret)
- goto out;
- ret = btrfs_test_free_space_tree(sectorsize, nodesize);
- if (ret)
- goto out;
- }
- }
-out:
- btrfs_destroy_test_fs();
- return ret;
-}
-
static int __init init_btrfs_fs(void)
{
int err;
SPACE_INFO_ATTR(bytes_pinned);
SPACE_INFO_ATTR(bytes_reserved);
SPACE_INFO_ATTR(bytes_may_use);
+SPACE_INFO_ATTR(bytes_readonly);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned);
BTRFS_ATTR_PTR(bytes_pinned),
BTRFS_ATTR_PTR(bytes_reserved),
BTRFS_ATTR_PTR(bytes_may_use),
+ BTRFS_ATTR_PTR(bytes_readonly),
BTRFS_ATTR_PTR(disk_used),
BTRFS_ATTR_PTR(disk_total),
BTRFS_ATTR_PTR(total_bytes_pinned),
return new_inode(test_mnt->mnt_sb);
}
-int btrfs_init_test_fs(void)
+static int btrfs_init_test_fs(void)
{
int ret;
return 0;
}
-void btrfs_destroy_test_fs(void)
+static void btrfs_destroy_test_fs(void)
{
kern_unmount(test_mnt);
unregister_filesystem(&test_type);
extent_io_tree_init(&fs_info->freed_extents[0], NULL);
extent_io_tree_init(&fs_info->freed_extents[1], NULL);
fs_info->pinned_extents = &fs_info->freed_extents[0];
+ set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
+
+ test_mnt->mnt_sb->s_fs_info = fs_info;
+
return fs_info;
}
-static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
+void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
struct radix_tree_iter iter;
void **slot;
+ if (!fs_info)
+ return;
+
+ if (WARN_ON(!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO,
+ &fs_info->fs_state)))
+ return;
+
+ test_mnt->mnt_sb->s_fs_info = NULL;
+
spin_lock(&fs_info->buffer_lock);
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
struct extent_buffer *eb;
{
if (!root)
return;
+ /* Will be freed by btrfs_free_fs_roots */
+ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
+ return;
if (root->node)
free_extent_buffer(root->node);
- if (root->fs_info)
- btrfs_free_dummy_fs_info(root->fs_info);
kfree(root);
}
INIT_LIST_HEAD(&trans->qgroup_ref_list);
trans->type = __TRANS_DUMMY;
}
+
+int btrfs_run_sanity_tests(void)
+{
+ int ret, i;
+ u32 sectorsize, nodesize;
+ u32 test_sectorsize[] = {
+ PAGE_SIZE,
+ };
+ ret = btrfs_init_test_fs();
+ if (ret)
+ return ret;
+ for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
+ sectorsize = test_sectorsize[i];
+ for (nodesize = sectorsize;
+ nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
+ nodesize <<= 1) {
+ pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n",
+ sectorsize, nodesize);
+ ret = btrfs_test_free_space_cache(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_buffer_operations(sectorsize,
+ nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_io(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_inodes(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_qgroups(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_free_space_tree(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ btrfs_destroy_test_fs();
+ return ret;
+}
#define __BTRFS_TESTS
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+int btrfs_run_sanity_tests(void);
#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
struct btrfs_root;
struct btrfs_trans_handle;
-int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize);
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
-int btrfs_init_test_fs(void);
-void btrfs_destroy_test_fs(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
+void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info);
void btrfs_free_dummy_root(struct btrfs_root *root);
struct btrfs_block_group_cache *
btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize);
void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
#else
-static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_extent_buffer_operations(u32 sectorsize,
- u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_init_test_fs(void)
-{
- return 0;
-}
-static inline void btrfs_destroy_test_fs(void)
-{
-}
-static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
-{
- return 0;
-}
-static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
+static inline int btrfs_run_sanity_tests(void)
{
return 0;
}
static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
{
- struct btrfs_path *path;
- struct btrfs_root *root;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_path *path = NULL;
+ struct btrfs_root *root = NULL;
struct extent_buffer *eb;
struct btrfs_item *item;
char *value = "mary had a little lamb";
test_msg("Running btrfs_split_item tests\n");
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Could not allocate fs_info\n");
+ return -ENOMEM;
+ }
+
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Could not allocate root\n");
- return PTR_ERR(root);
+ ret = PTR_ERR(root);
+ goto out;
}
path = btrfs_alloc_path();
if (!path) {
test_msg("Could not allocate path\n");
- kfree(root);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize,
}
out:
btrfs_free_path(path);
- kfree(root);
+ btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info;
struct btrfs_block_group_cache *cache;
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
return 0;
}
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- ret = PTR_ERR(root);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ ret = -ENOMEM;
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info)
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
goto out;
+ }
root->fs_info->extent_root = root;
cache->fs_info = root->fs_info;
out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
test_msg("Free space cache tests finished\n");
return ret;
}
static int run_test(test_func_t test_func, int bitmaps,
u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info;
struct btrfs_root *root = NULL;
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_trans_handle trans;
struct btrfs_path *path = NULL;
int ret;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate dummy root\n");
- ret = PTR_ERR(root);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ ret = -ENOMEM;
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
- ret = -ENOMEM;
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate dummy root\n");
+ ret = PTR_ERR(root);
goto out;
}
btrfs_free_path(path);
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
struct extent_map *em = NULL;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
- /*
- * We do this since btrfs_get_extent wants to assign em->bdev to
- * root->fs_info->fs_devices->latest_bdev.
- */
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
goto out;
}
free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
static int test_hole_first(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
struct extent_map *em = NULL;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
goto out;
}
free_extent_map(em);
iput(inode);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
static int test_extent_accounting(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
return ret;
}
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
goto out;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
goto out;
}
NULL, GFP_KERNEL);
iput(inode);
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct btrfs_root *root;
struct btrfs_root *tmp_root;
int ret = 0;
- root = btrfs_alloc_dummy_root(sectorsize, nodesize);
- if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
- return PTR_ERR(root);
+ fs_info = btrfs_alloc_dummy_fs_info();
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ return -ENOMEM;
}
- root->fs_info = btrfs_alloc_dummy_fs_info();
- if (!root->fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
- ret = -ENOMEM;
+ root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ ret = PTR_ERR(root);
goto out;
}
+
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
btrfs_set_header_nritems(root->node, 0);
root->alloc_bytenr += 2 * nodesize;
- tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
+ tmp_root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
- tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
+ tmp_root = btrfs_alloc_dummy_root(fs_info, sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
ret = test_multiple_refs(root, sectorsize, nodesize);
out:
btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
h->transaction = cur_trans;
h->root = root;
h->use_count = 1;
+ h->fs_info = root->fs_info;
h->type = type;
h->can_flush_pending_bgs = true;
goto dir_item_existed;
} else if (IS_ERR(dir_item)) {
ret = PTR_ERR(dir_item);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
btrfs_release_path(path);
*/
ret = btrfs_run_delayed_items(trans, root);
if (ret) { /* Transaction aborted */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
if (ret) {
btrfs_tree_unlock(old);
free_extent_buffer(old);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
btrfs_tree_unlock(old);
free_extent_buffer(old);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
/* see comments in should_cow_block() */
btrfs_tree_unlock(tmp);
free_extent_buffer(tmp);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
btrfs_ino(parent_inode), index,
dentry->d_name.name, dentry->d_name.len);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_reloc_post_snapshot(trans, pending);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
/* We have check then name at the beginning, so it is impossible. */
BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
BTRFS_UUID_KEY_SUBVOL, objectid);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
objectid);
if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
}
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto fail;
}
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
- if (btrfs_test_opt(root, SPACE_CACHE))
+ if (btrfs_test_opt(root->fs_info, SPACE_CACHE))
super->cache_generation = root_item->generation;
if (root->fs_info->update_uuid_tree_gen)
super->uuid_tree_generation = root_item->generation;
WARN_ON(trans->use_count > 1);
- btrfs_abort_transaction(trans, root, err);
+ btrfs_abort_transaction(trans, err);
spin_lock(&root->fs_info->trans_lock);
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
- if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
return btrfs_start_delalloc_roots(fs_info, 1, -1);
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
- if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
}
* Subvolume quota depends on this
*/
struct btrfs_root *root;
+ struct btrfs_fs_info *fs_info;
struct seq_list delayed_ref_elem;
struct list_head qgroup_ref_list;
struct list_head new_bgs;
while (1) {
int batch = atomic_read(&root->log_batch);
/* when we're on an ssd, just kick the log commit out */
- if (!btrfs_test_opt(root, SSD) &&
+ if (!btrfs_test_opt(root->fs_info, SSD) &&
test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) {
mutex_unlock(&root->log_mutex);
schedule_timeout_uninterruptible(1);
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
if (ret) {
blk_finish_plug(&plug);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_free_logged_extents(log, log_transid);
btrfs_set_log_full_commit(root->fs_info, trans);
mutex_unlock(&root->log_mutex);
btrfs_set_log_full_commit(root->fs_info, trans);
if (ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
mutex_unlock(&log_root_tree->log_mutex);
goto out;
}
blk_finish_plug(&plug);
if (ret) {
btrfs_set_log_full_commit(root->fs_info, trans);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
if (ret) {
btrfs_set_log_full_commit(root->fs_info, trans);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out_wake_log_root;
}
ret = walk_log_tree(trans, log, &wc);
/* I don't think this can happen but just in case */
if (ret)
- btrfs_abort_transaction(trans, log, ret);
+ btrfs_abort_transaction(trans, ret);
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
} else if (ret < 0)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_log_trans(root);
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
} else if (ret < 0 && ret != -ENOENT)
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_log_trans(root);
return ret;
ins_nr = 0;
ret = btrfs_search_forward(root, &min_key,
path, trans->transid);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
if (ret != 0)
break;
again:
sb = inode->i_sb;
- if (btrfs_test_opt(root, NOTREELOG)) {
+ if (btrfs_test_opt(root->fs_info, NOTREELOG)) {
ret = 1;
goto end_no_trans;
}
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
-static void btrfs_close_one_device(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
schedule_work(&device->rcu_work);
}
+static void btrfs_close_one_device(struct btrfs_device *device)
+{
+ struct btrfs_fs_devices *fs_devices = device->fs_devices;
+ struct btrfs_device *new_device;
+ struct rcu_string *name;
+
+ if (device->bdev)
+ fs_devices->open_devices--;
+
+ if (device->writeable &&
+ device->devid != BTRFS_DEV_REPLACE_DEVID) {
+ list_del_init(&device->dev_alloc_list);
+ fs_devices->rw_devices--;
+ }
+
+ if (device->missing)
+ fs_devices->missing_devices--;
+
+ if (device->bdev && device->writeable) {
+ sync_blockdev(device->bdev);
+ invalidate_bdev(device->bdev);
+ }
+
+ new_device = btrfs_alloc_device(NULL, &device->devid,
+ device->uuid);
+ BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
+
+ /* Safe because we are under uuid_mutex */
+ if (device->name) {
+ name = rcu_string_strdup(device->name->str, GFP_NOFS);
+ BUG_ON(!name); /* -ENOMEM */
+ rcu_assign_pointer(new_device->name, name);
+ }
+
+ list_replace_rcu(&device->dev_list, &new_device->dev_list);
+ new_device->fs_devices = device->fs_devices;
+
+ call_rcu(&device->rcu, free_device);
+}
+
static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
ret = init_first_rw_device(trans, root, device);
unlock_chunks(root);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error_trans;
}
}
ret = btrfs_add_device(trans, root, device);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error_trans;
}
ret = btrfs_finish_sprout(trans, root);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto error_trans;
}
&dev_extent_len);
if (ret) {
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_update_device(trans, map->stripes[i].dev);
if (ret) {
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em);
if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
* chunk tree entries
*/
ret = btrfs_remove_chunk(trans, root, chunk_offset);
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction(trans, extent_root);
return ret;
}
u64 size_to_free;
u64 chunk_type;
struct btrfs_chunk *chunk;
- struct btrfs_path *path;
+ struct btrfs_path *path = NULL;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_trans_handle *trans;
ret = btrfs_shrink_device(device, old_size - size_to_free);
if (ret == -ENOSPC)
break;
- BUG_ON(ret);
+ if (ret) {
+ /* btrfs_shrink_device never returns ret > 0 */
+ WARN_ON(ret > 0);
+ goto error;
+ }
trans = btrfs_start_transaction(dev_root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ btrfs_info_in_rcu(fs_info,
+ "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu",
+ rcu_str_deref(device->name), ret,
+ old_size, old_size - size_to_free);
+ goto error;
+ }
ret = btrfs_grow_device(trans, device, old_size);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_end_transaction(trans, dev_root);
+ /* btrfs_grow_device never returns ret > 0 */
+ WARN_ON(ret > 0);
+ btrfs_info_in_rcu(fs_info,
+ "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu",
+ rcu_str_deref(device->name), ret,
+ old_size, old_size - size_to_free);
+ goto error;
+ }
btrfs_end_transaction(trans, dev_root);
}
}
spin_unlock(&fs_info->balance_lock);
- if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+ if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
btrfs_info(fs_info, "force skipping balance");
return 0;
}
BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
- btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans, tree_root);
return ret;
}
btrfs_set_fs_incompat(info, RAID56);
}
-#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \
- - sizeof(struct btrfs_item) \
+#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r) \
- sizeof(struct btrfs_chunk)) \
/ sizeof(struct btrfs_stripe) + 1)
BTRFS_UUID_SIZE);
map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
uuid, NULL);
- if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
+ if (!map->stripes[i].dev &&
+ !btrfs_test_opt(root->fs_info, DEGRADED)) {
free_extent_map(em);
return -EIO;
}
fs_devices = find_fsid(fsid);
if (!fs_devices) {
- if (!btrfs_test_opt(root, DEGRADED))
+ if (!btrfs_test_opt(root->fs_info, DEGRADED))
return ERR_PTR(-ENOENT);
fs_devices = alloc_fs_devices(fsid);
device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid);
if (!device) {
- if (!btrfs_test_opt(root, DEGRADED))
+ if (!btrfs_test_opt(root->fs_info, DEGRADED))
return -EIO;
device = add_missing_dev(root, fs_devices, devid, dev_uuid);
btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
devid, dev_uuid);
} else {
- if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
+ if (!device->bdev && !btrfs_test_opt(root->fs_info, DEGRADED))
return -EIO;
if(!device->bdev && !device->missing) {
fs_devices = fs_devices->seed;
}
}
-
-static void btrfs_close_one_device(struct btrfs_device *device)
-{
- struct btrfs_fs_devices *fs_devices = device->fs_devices;
- struct btrfs_device *new_device;
- struct rcu_string *name;
-
- if (device->bdev)
- fs_devices->open_devices--;
-
- if (device->writeable &&
- device->devid != BTRFS_DEV_REPLACE_DEVID) {
- list_del_init(&device->dev_alloc_list);
- fs_devices->rw_devices--;
- }
-
- if (device->missing)
- fs_devices->missing_devices--;
-
- new_device = btrfs_alloc_device(NULL, &device->devid,
- device->uuid);
- BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
-
- /* Safe because we are under uuid_mutex */
- if (device->name) {
- name = rcu_string_strdup(device->name->str, GFP_NOFS);
- BUG_ON(!name); /* -ENOMEM */
- rcu_assign_pointer(new_device->name, name);
- }
-
- list_replace_rcu(&device->dev_list, &new_device->dev_list);
- new_device->fs_devices = device->fs_devices;
-
- call_rcu(&device->rcu, free_device);
-}
}
EXPORT_SYMBOL(setup_arg_pages);
+#else
+
+/*
+ * Transfer the program arguments and environment from the holding pages
+ * onto the stack. The provided stack pointer is adjusted accordingly.
+ */
+int transfer_args_to_stack(struct linux_binprm *bprm,
+ unsigned long *sp_location)
+{
+ unsigned long index, stop, sp;
+ int ret = 0;
+
+ stop = bprm->p >> PAGE_SHIFT;
+ sp = *sp_location;
+
+ for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
+ unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
+ char *src = kmap(bprm->page[index]) + offset;
+ sp -= PAGE_SIZE - offset;
+ if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
+ ret = -EFAULT;
+ kunmap(bprm->page[index]);
+ if (ret)
+ goto out;
+ }
+
+ *sp_location = sp;
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL(transfer_args_to_stack);
+
#endif /* CONFIG_MMU */
static struct file *do_open_execat(int fd, struct filename *name, int flags)
if (S_ISLNK(root_inode->i_mode)) {
char *name = follow_link(host_root_path);
- if (IS_ERR(name))
+ if (IS_ERR(name)) {
err = PTR_ERR(name);
- else
- err = read_name(root_inode, name);
+ goto out_put;
+ }
+ err = read_name(root_inode, name);
kfree(name);
if (err)
goto out_put;
bool "NFSv4.1 server support for pNFS block layouts"
depends on NFSD_V4 && BLOCK
select NFSD_PNFS
+ select EXPORTFS_BLOCK_OPS
help
This option enables support for the exporting pNFS block layouts
in the kernel's NFS server. The pNFS block layout enables NFS
bool "NFSv4.1 server support for pNFS SCSI layouts"
depends on NFSD_V4 && BLOCK
select NFSD_PNFS
+ select EXPORTFS_BLOCK_OPS
help
This option enables support for the exporting pNFS SCSI layouts
in the kernel's NFS server. The pNFS SCSI layout enables NFS
If unsure, say N.
+config NFSD_FLEXFILELAYOUT
+ bool "NFSv4.1 server support for pNFS Flex File layouts"
+ depends on NFSD_V4
+ select NFSD_PNFS
+ help
+ This option enables support for the exporting pNFS Flex File
+ layouts in the kernel's NFS server. The pNFS Flex File layout
+ enables NFS clients to directly perform I/O to NFSv3 devices
+ accesible to both the server and the clients. See
+ draft-ietf-nfsv4-flex-files for more details.
+
+ Warning, this server implements the bare minimum functionality
+ to be a flex file server - it is for testing the client,
+ not for use in production.
+
+ If unsure, say N.
+
config NFSD_V4_SECURITY_LABEL
bool "Provide Security Label support for NFSv4 server"
depends on NFSD_V4 && SECURITY
nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
static __be32
nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+ struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
static __be32
nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+ struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
switch (b->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
- len = 4 + 4 + 8 + 4 + b->simple.sig_len;
+ len = 4 + 4 + 8 + 4 + (XDR_QUADLEN(b->simple.sig_len) << 2);
p = xdr_reserve_space(xdr, len);
if (!p)
return -ETOOSMALL;
p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
break;
case PNFS_BLOCK_VOLUME_SCSI:
- len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+ len = 4 + 4 + 4 + 4 + (XDR_QUADLEN(b->scsi.designator_len) << 2) + 8;
p = xdr_reserve_space(xdr, len);
if (!p)
return -ETOOSMALL;
new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0;
- new->ex_layout_type = 0;
+ new->ex_layout_types = 0;
new->ex_uuid = NULL;
new->cd = item->cd;
}
item->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = item->ex_fslocs.migrated;
item->ex_fslocs.migrated = 0;
- new->ex_layout_type = item->ex_layout_type;
+ new->ex_layout_types = item->ex_layout_types;
new->ex_nflavors = item->ex_nflavors;
for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i];
rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
return 0;
}
+
+ /* If the compound op contains a spo_must_allowed op,
+ * it will be sent with integrity/protection which
+ * will have to be expressly allowed on mounts that
+ * don't support it
+ */
+
+ if (nfsd4_spo_must_allow(rqstp))
+ return 0;
+
return nfserr_wrongsec;
}
struct nfsd4_fs_locations ex_fslocs;
uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
- enum pnfs_layouttype ex_layout_type;
+ u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
};
--- /dev/null
+/*
+ * Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
+ *
+ * The following implements a super-simple flex-file server
+ * where the NFSv4.1 mds is also the ds. And the storage is
+ * the same. I.e., writing to the mds via a NFSv4.1 WRITE
+ * goes to the same location as the NFSv3 WRITE.
+ */
+#include <linux/slab.h>
+
+#include <linux/nfsd/debug.h>
+
+#include <linux/sunrpc/addr.h>
+
+#include "flexfilelayoutxdr.h"
+#include "pnfs.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+static __be32
+nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
+ struct nfsd4_layoutget *args)
+{
+ struct nfsd4_layout_seg *seg = &args->lg_seg;
+ u32 device_generation = 0;
+ int error;
+ uid_t u;
+
+ struct pnfs_ff_layout *fl;
+
+ /*
+ * The super simple flex file server has 1 mirror, 1 data server,
+ * and 1 file handle. So instead of 4 allocs, do 1 for now.
+ * Zero it out for the stateid - don't want junk in there!
+ */
+ error = -ENOMEM;
+ fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+ if (!fl)
+ goto out_error;
+ args->lg_content = fl;
+
+ /*
+ * Avoid layout commit, try to force the I/O to the DS,
+ * and for fun, cause all IOMODE_RW layout segments to
+ * effectively be WRITE only.
+ */
+ fl->flags = FF_FLAGS_NO_LAYOUTCOMMIT | FF_FLAGS_NO_IO_THRU_MDS |
+ FF_FLAGS_NO_READ_IO;
+
+ /* Do not allow a IOMODE_READ segment to have write pemissions */
+ if (seg->iomode == IOMODE_READ) {
+ u = from_kuid(&init_user_ns, inode->i_uid) + 1;
+ fl->uid = make_kuid(&init_user_ns, u);
+ } else
+ fl->uid = inode->i_uid;
+ fl->gid = inode->i_gid;
+
+ error = nfsd4_set_deviceid(&fl->deviceid, fhp, device_generation);
+ if (error)
+ goto out_error;
+
+ fl->fh.size = fhp->fh_handle.fh_size;
+ memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size);
+
+ /* Give whole file layout segments */
+ seg->offset = 0;
+ seg->length = NFS4_MAX_UINT64;
+
+ dprintk("GET: 0x%llx:0x%llx %d\n", seg->offset, seg->length,
+ seg->iomode);
+ return 0;
+
+out_error:
+ seg->length = 0;
+ return nfserrno(error);
+}
+
+static __be32
+nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
+ struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_ff_device_addr *da;
+
+ u16 port;
+ char addr[INET6_ADDRSTRLEN];
+
+ da = kzalloc(sizeof(struct pnfs_ff_device_addr), GFP_KERNEL);
+ if (!da)
+ return nfserrno(-ENOMEM);
+
+ gdp->gd_device = da;
+
+ da->version = 3;
+ da->minor_version = 0;
+
+ da->rsize = svc_max_payload(rqstp);
+ da->wsize = da->rsize;
+
+ rpc_ntop((struct sockaddr *)&rqstp->rq_daddr,
+ addr, INET6_ADDRSTRLEN);
+ if (rqstp->rq_daddr.ss_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&rqstp->rq_daddr;
+ port = ntohs(sin->sin_port);
+ snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp");
+ da->netaddr.netid_len = 3;
+ } else {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&rqstp->rq_daddr;
+ port = ntohs(sin6->sin6_port);
+ snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp6");
+ da->netaddr.netid_len = 4;
+ }
+
+ da->netaddr.addr_len =
+ snprintf(da->netaddr.addr, FF_ADDR_LEN + 1,
+ "%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
+
+ da->tightly_coupled = false;
+
+ return 0;
+}
+
+const struct nfsd4_layout_ops ff_layout_ops = {
+ .notify_types =
+ NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+ .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo,
+ .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
+ .proc_layoutget = nfsd4_ff_proc_layoutget,
+ .encode_layoutget = nfsd4_ff_encode_layoutget,
+};
--- /dev/null
+/*
+ * Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
+ */
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs4.h>
+
+#include "nfsd.h"
+#include "flexfilelayoutxdr.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+struct ff_idmap {
+ char buf[11];
+ int len;
+};
+
+__be32
+nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
+ struct nfsd4_layoutget *lgp)
+{
+ struct pnfs_ff_layout *fl = lgp->lg_content;
+ int len, mirror_len, ds_len, fh_len;
+ __be32 *p;
+
+ /*
+ * Unlike nfsd4_encode_user, we know these will
+ * always be stringified.
+ */
+ struct ff_idmap uid;
+ struct ff_idmap gid;
+
+ fh_len = 4 + fl->fh.size;
+
+ uid.len = sprintf(uid.buf, "%u", from_kuid(&init_user_ns, fl->uid));
+ gid.len = sprintf(gid.buf, "%u", from_kgid(&init_user_ns, fl->gid));
+
+ /* 8 + len for recording the length, name, and padding */
+ ds_len = 20 + sizeof(stateid_opaque_t) + 4 + fh_len +
+ 8 + uid.len + 8 + gid.len;
+
+ mirror_len = 4 + ds_len;
+
+ /* The layout segment */
+ len = 20 + mirror_len;
+
+ p = xdr_reserve_space(xdr, sizeof(__be32) + len);
+ if (!p)
+ return nfserr_toosmall;
+
+ *p++ = cpu_to_be32(len);
+ p = xdr_encode_hyper(p, 0); /* stripe unit of 1 */
+
+ *p++ = cpu_to_be32(1); /* single mirror */
+ *p++ = cpu_to_be32(1); /* single data server */
+
+ p = xdr_encode_opaque_fixed(p, &fl->deviceid,
+ sizeof(struct nfsd4_deviceid));
+
+ *p++ = cpu_to_be32(1); /* efficiency */
+
+ *p++ = cpu_to_be32(fl->stateid.si_generation);
+ p = xdr_encode_opaque_fixed(p, &fl->stateid.si_opaque,
+ sizeof(stateid_opaque_t));
+
+ *p++ = cpu_to_be32(1); /* single file handle */
+ p = xdr_encode_opaque(p, fl->fh.data, fl->fh.size);
+
+ p = xdr_encode_opaque(p, uid.buf, uid.len);
+ p = xdr_encode_opaque(p, gid.buf, gid.len);
+
+ *p++ = cpu_to_be32(fl->flags);
+ *p++ = cpu_to_be32(0); /* No stats collect hint */
+
+ return 0;
+}
+
+__be32
+nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_ff_device_addr *da = gdp->gd_device;
+ int len;
+ int ver_len;
+ int addr_len;
+ __be32 *p;
+
+ /* len + padding for two strings */
+ addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
+ ver_len = 20;
+
+ len = 4 + ver_len + 4 + addr_len;
+
+ p = xdr_reserve_space(xdr, len + sizeof(__be32));
+ if (!p)
+ return nfserr_resource;
+
+ /*
+ * Fill in the overall length and number of volumes at the beginning
+ * of the layout.
+ */
+ *p++ = cpu_to_be32(len);
+ *p++ = cpu_to_be32(1); /* 1 netaddr */
+ p = xdr_encode_opaque(p, da->netaddr.netid, da->netaddr.netid_len);
+ p = xdr_encode_opaque(p, da->netaddr.addr, da->netaddr.addr_len);
+
+ *p++ = cpu_to_be32(1); /* 1 versions */
+
+ *p++ = cpu_to_be32(da->version);
+ *p++ = cpu_to_be32(da->minor_version);
+ *p++ = cpu_to_be32(da->rsize);
+ *p++ = cpu_to_be32(da->wsize);
+ *p++ = cpu_to_be32(da->tightly_coupled);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
+ */
+#ifndef _NFSD_FLEXFILELAYOUTXDR_H
+#define _NFSD_FLEXFILELAYOUTXDR_H 1
+
+#include <linux/inet.h>
+#include "xdr4.h"
+
+#define FF_FLAGS_NO_LAYOUTCOMMIT 1
+#define FF_FLAGS_NO_IO_THRU_MDS 2
+#define FF_FLAGS_NO_READ_IO 4
+
+struct xdr_stream;
+
+#define FF_NETID_LEN (4)
+#define FF_ADDR_LEN (INET6_ADDRSTRLEN + 8)
+struct pnfs_ff_netaddr {
+ char netid[FF_NETID_LEN + 1];
+ char addr[FF_ADDR_LEN + 1];
+ u32 netid_len;
+ u32 addr_len;
+};
+
+struct pnfs_ff_device_addr {
+ struct pnfs_ff_netaddr netaddr;
+ u32 version;
+ u32 minor_version;
+ u32 rsize;
+ u32 wsize;
+ bool tightly_coupled;
+};
+
+struct pnfs_ff_layout {
+ u32 flags;
+ u32 stats_collect_hint;
+ kuid_t uid;
+ kgid_t gid;
+ struct nfsd4_deviceid deviceid;
+ stateid_t stateid;
+ struct nfs_fh fh;
+};
+
+__be32 nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
+ struct nfsd4_getdeviceinfo *gdp);
+__be32 nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
+ struct nfsd4_layoutget *lgp);
+
+#endif /* _NFSD_FLEXFILELAYOUTXDR_H */
static const struct lock_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
+#ifdef CONFIG_NFSD_FLEXFILELAYOUT
+ [LAYOUT_FLEX_FILES] = &ff_layout_ops,
+#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT
[LAYOUT_BLOCK_VOLUME] = &bl_layout_ops,
#endif
void nfsd4_setup_layout_type(struct svc_export *exp)
{
+#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
struct super_block *sb = exp->ex_path.mnt->mnt_sb;
+#endif
if (!(exp->ex_flags & NFSEXP_PNFS))
return;
/*
- * Check if the file system supports exporting a block-like layout.
+ * If flex file is configured, use it by default. Otherwise
+ * check if the file system supports exporting a block-like layout.
* If the block device supports reservations prefer the SCSI layout,
* otherwise advertise the block layout.
*/
+#ifdef CONFIG_NFSD_FLEXFILELAYOUT
+ exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES;
+#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT
+ /* overwrite flex file layout selection if needed */
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
- exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+ exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME;
#endif
#ifdef CONFIG_NFSD_SCSILAYOUT
/* overwrite block layout selection if needed */
if (sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks &&
sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
- exp->ex_layout_type = LAYOUT_SCSI;
+ exp->ex_layout_types |= 1 << LAYOUT_SCSI;
#endif
}
fh_init(&resfh, NFS4_FHSIZE);
- status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
- NFSD_MAY_CREATE);
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_NOP);
if (status)
return status;
static const struct nfsd4_layout_ops *
nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
{
- if (!exp->ex_layout_type) {
+ if (!exp->ex_layout_types) {
dprintk("%s: export does not support pNFS\n", __func__);
return NULL;
}
- if (exp->ex_layout_type != layout_type) {
+ if (!(exp->ex_layout_types & (1 << layout_type))) {
dprintk("%s: layout type %d not supported\n",
__func__, layout_type);
return NULL;
nfserr = nfs_ok;
if (gdp->gd_maxcount != 0) {
nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
- cstate->session->se_client, gdp);
+ rqstp, cstate->session->se_client, gdp);
}
gdp->gd_notify_types &= ops->notify_types;
},
};
+/**
+ * nfsd4_spo_must_allow - Determine if the compound op contains an
+ * operation that is allowed to be sent with machine credentials
+ *
+ * @rqstp: a pointer to the struct svc_rqst
+ *
+ * Checks to see if the compound contains a spo_must_allow op
+ * and confirms that it was sent with the proper machine creds.
+ */
+
+bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+ struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
+ struct nfsd4_compound_state *cstate = &resp->cstate;
+ struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow;
+ u32 opiter;
+
+ if (!cstate->minorversion)
+ return false;
+
+ if (cstate->spo_must_allowed == true)
+ return true;
+
+ opiter = resp->opcnt;
+ while (opiter < argp->opcnt) {
+ this = &argp->ops[opiter++];
+ if (test_bit(this->opnum, allow->u.longs) &&
+ cstate->clp->cl_mach_cred &&
+ nfsd4_mach_creds_match(cstate->clp, rqstp)) {
+ cstate->spo_must_allowed = true;
+ return true;
+ }
+ }
+ cstate->spo_must_allowed = false;
+ return false;
+}
+
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
struct nfsd4_operation *opdesc;
}
}
-static void release_lockowner(struct nfs4_lockowner *lo)
-{
- struct nfs4_client *clp = lo->lo_owner.so_client;
- struct nfs4_ol_stateid *stp;
- struct list_head reaplist;
-
- INIT_LIST_HEAD(&reaplist);
-
- spin_lock(&clp->cl_lock);
- unhash_lockowner_locked(lo);
- while (!list_empty(&lo->lo_owner.so_stateids)) {
- stp = list_first_entry(&lo->lo_owner.so_stateids,
- struct nfs4_ol_stateid, st_perstateowner);
- WARN_ON(!unhash_lock_stateid(stp));
- put_ol_stateid_locked(stp, &reaplist);
- }
- spin_unlock(&clp->cl_lock);
- free_ol_stateid_reaplist(&reaplist);
- nfs4_put_stateowner(&lo->lo_owner);
-}
-
static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
struct list_head *reaplist)
{
service == RPC_GSS_SVC_PRIVACY;
}
-static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
+bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
{
struct svc_cred *cr = &rqstp->rq_cred;
switch (exid->spa_how) {
case SP4_MACH_CRED:
+ exid->spo_must_enforce[0] = 0;
+ exid->spo_must_enforce[1] = (
+ 1 << (OP_BIND_CONN_TO_SESSION - 32) |
+ 1 << (OP_EXCHANGE_ID - 32) |
+ 1 << (OP_CREATE_SESSION - 32) |
+ 1 << (OP_DESTROY_SESSION - 32) |
+ 1 << (OP_DESTROY_CLIENTID - 32));
+
+ exid->spo_must_allow[0] &= (1 << (OP_CLOSE) |
+ 1 << (OP_OPEN_DOWNGRADE) |
+ 1 << (OP_LOCKU) |
+ 1 << (OP_DELEGRETURN));
+
+ exid->spo_must_allow[1] &= (
+ 1 << (OP_TEST_STATEID - 32) |
+ 1 << (OP_FREE_STATEID - 32));
if (!svc_rqst_integrity_protected(rqstp)) {
status = nfserr_inval;
goto out_nolock;
status = nfserr_inval;
goto out;
}
- if (!mach_creds_match(conf, rqstp)) {
+ if (!nfsd4_mach_creds_match(conf, rqstp)) {
status = nfserr_wrong_cred;
goto out;
}
goto out;
}
new->cl_minorversion = cstate->minorversion;
+ new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
+ new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
gen_clid(new, nn);
add_to_unconfirmed(new);
if (conf) {
status = nfserr_wrong_cred;
- if (!mach_creds_match(conf, rqstp))
+ if (!nfsd4_mach_creds_match(conf, rqstp))
goto out_free_conn;
cs_slot = &conf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
goto out_free_conn;
}
status = nfserr_wrong_cred;
- if (!mach_creds_match(unconf, rqstp))
+ if (!nfsd4_mach_creds_match(unconf, rqstp))
goto out_free_conn;
cs_slot = &unconf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
if (!session)
goto out_no_session;
status = nfserr_wrong_cred;
- if (!mach_creds_match(session->se_client, rqstp))
+ if (!nfsd4_mach_creds_match(session->se_client, rqstp))
goto out;
status = nfsd4_map_bcts_dir(&bcts->dir);
if (status)
if (!ses)
goto out_client_lock;
status = nfserr_wrong_cred;
- if (!mach_creds_match(ses->se_client, r))
+ if (!nfsd4_mach_creds_match(ses->se_client, r))
goto out_put_session;
status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
if (status)
status = nfserr_stale_clientid;
goto out;
}
- if (!mach_creds_match(clp, rqstp)) {
+ if (!nfsd4_mach_creds_match(clp, rqstp)) {
clp = NULL;
status = nfserr_wrong_cred;
goto out;
* We don't take advantage of the rca_one_fs case.
* That's OK, it's optional, we can safely ignore it.
*/
- return nfs_ok;
+ return nfs_ok;
}
status = nfserr_complete_already;
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfs4_client *clp;
+ LIST_HEAD (reaplist);
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
nfs4_get_stateowner(sop);
break;
}
+ if (!lo) {
+ spin_unlock(&clp->cl_lock);
+ return status;
+ }
+
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+ stp = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid,
+ st_perstateowner);
+ WARN_ON(!unhash_lock_stateid(stp));
+ put_ol_stateid_locked(stp, &reaplist);
+ }
spin_unlock(&clp->cl_lock);
- if (lo)
- release_lockowner(lo);
+ free_ol_stateid_reaplist(&reaplist);
+ nfs4_put_stateowner(&lo->lo_owner);
+
return status;
}
break;
case SP4_MACH_CRED:
/* spo_must_enforce */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- p += dummy;
-
+ status = nfsd4_decode_bitmap(argp,
+ exid->spo_must_enforce);
+ if (status)
+ goto out;
/* spo_must_allow */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- p += dummy;
+ status = nfsd4_decode_bitmap(argp, exid->spo_must_allow);
+ if (status)
+ goto out;
break;
case SP4_SSV:
/* ssp_ops */
}
static inline __be32
-nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type)
+nfsd4_encode_layout_types(struct xdr_stream *xdr, u32 layout_types)
{
- __be32 *p;
+ __be32 *p;
+ unsigned long i = hweight_long(layout_types);
- if (layout_type) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(1);
- *p++ = cpu_to_be32(layout_type);
- } else {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0);
- }
+ p = xdr_reserve_space(xdr, 4 + 4 * i);
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(i);
+
+ for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i)
+ if (layout_types & (1 << i))
+ *p++ = cpu_to_be32(i);
return 0;
}
}
#ifdef CONFIG_NFSD_PNFS
if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
- status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+ status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
if (status)
goto out;
}
if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
- status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+ status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
if (status)
goto out;
}
return nfserr;
}
-static const u32 nfs4_minimal_spo_must_enforce[2] = {
- [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
- 1 << (OP_EXCHANGE_ID - 32) |
- 1 << (OP_CREATE_SESSION - 32) |
- 1 << (OP_DESTROY_SESSION - 32) |
- 1 << (OP_DESTROY_CLIENTID - 32)
-};
-
static __be32
nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_exchange_id *exid)
char *server_scope;
int major_id_sz;
int server_scope_sz;
+ int status = 0;
uint64_t minor_id = 0;
if (nfserr)
case SP4_NONE:
break;
case SP4_MACH_CRED:
- /* spo_must_enforce, spo_must_allow */
- p = xdr_reserve_space(xdr, 16);
- if (!p)
- return nfserr_resource;
-
/* spo_must_enforce bitmap: */
- *p++ = cpu_to_be32(2);
- *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]);
- *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]);
- /* empty spo_must_allow bitmap: */
- *p++ = cpu_to_be32(0);
-
+ status = nfsd4_encode_bitmap(xdr,
+ exid->spo_must_enforce[0],
+ exid->spo_must_enforce[1],
+ exid->spo_must_enforce[2]);
+ if (status)
+ goto out;
+ /* spo_must_allow bitmap: */
+ status = nfsd4_encode_bitmap(xdr,
+ exid->spo_must_allow[0],
+ exid->spo_must_allow[1],
+ exid->spo_must_allow[2]);
+ if (status)
+ goto out;
break;
default:
WARN_ON_ONCE(1);
/* Implementation id */
*p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */
return 0;
+out:
+ return status;
}
static __be32
void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
+bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
#else
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
static inline void nfs4_reset_lease(time_t leasetime) { }
static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
static inline char * nfs4_recoverydir(void) {return NULL; }
+static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
+{
+ return false;
+}
#endif
/*
* the write call).
*/
static inline __be32
-nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested)
+nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
+ umode_t requested)
{
- mode &= S_IFMT;
+ umode_t mode = d_inode(dentry)->i_mode & S_IFMT;
if (requested == 0) /* the caller doesn't care */
return nfs_ok;
- if (mode == requested)
+ if (mode == requested) {
+ if (mode == S_IFDIR && !d_can_lookup(dentry)) {
+ WARN_ON_ONCE(1);
+ return nfserr_notdir;
+ }
return nfs_ok;
+ }
/*
* v4 has an error more specific than err_notdir which we should
* return in preference to err_notdir:
* that it expects something not of the given type.
*
* @access is formed from the NFSD_MAY_* constants defined in
- * include/linux/nfsd/nfsd.h.
+ * fs/nfsd/vfs.h.
*/
__be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
if (error)
goto out;
- error = nfsd_mode_check(rqstp, d_inode(dentry)->i_mode, type);
+ error = nfsd_mode_check(rqstp, dentry, type);
if (error)
goto out;
* the reference filehandle (if it is in the same export)
* or the export options.
*/
- set_version_and_fsid_type(fhp, exp, ref_fh);
+ set_version_and_fsid_type(fhp, exp, ref_fh);
if (ref_fh == fhp)
fh_put(ref_fh);
/* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
- nfserr = nfserr_acces;
- if (!argp->len)
- goto done;
nfserr = nfserr_exist;
if (isdotent(argp->name, argp->len))
goto done;
nfserr = 0;
if (!inode) {
/* File doesn't exist. Create it and set attrs */
- nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len,
- attr, type, rdev, newfhp);
+ nfserr = nfsd_create_locked(rqstp, dirfhp, argp->name,
+ argp->len, attr, type, rdev, newfhp);
} else if (type == S_IFREG) {
dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
argp->name, attr->ia_valid, (long) attr->ia_size);
|| !(p = decode_filename(p, &args->name, &args->len)))
return 0;
- return xdr_argsize_check(rqstp, p);
+ return xdr_argsize_check(rqstp, p);
}
int
u32 notify_types;
__be32 (*proc_getdeviceinfo)(struct super_block *sb,
+ struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdevp);
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
#ifdef CONFIG_NFSD_SCSILAYOUT
extern const struct nfsd4_layout_ops scsi_layout_ops;
#endif
+#ifdef CONFIG_NFSD_FLEXFILELAYOUT
+extern const struct nfsd4_layout_ops ff_layout_ops;
+#endif
__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
atomic_t cl_refcount;
+ struct nfs4_op_map cl_spo_must_allow;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
iap->ia_valid &= ~ATTR_SIZE;
}
-/*
- * Create a file (regular, directory, device, fifo); UNIX sockets
- * not yet implemented.
- * If the response fh has been verified, the parent directory should
- * already be locked. Note that the parent directory is left locked.
- *
- * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
- */
+/* The parent directory should already be locked: */
__be32
-nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap,
int type, dev_t rdev, struct svc_fh *resfhp)
{
- struct dentry *dentry, *dchild = NULL;
+ struct dentry *dentry, *dchild;
struct inode *dirp;
__be32 err;
__be32 err2;
int host_err;
- err = nfserr_perm;
- if (!flen)
- goto out;
- err = nfserr_exist;
- if (isdotent(fname, flen))
- goto out;
-
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
- if (err)
- goto out;
-
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
- err = nfserr_notdir;
- if (!dirp->i_op->lookup)
- goto out;
- /*
- * Check whether the response file handle has been verified yet.
- * If it has, the parent directory should already be locked.
- */
- if (!resfhp->fh_dentry) {
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
-
- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
- fh_lock_nested(fhp, I_MUTEX_PARENT);
- dchild = lookup_one_len(fname, dentry, flen);
- host_err = PTR_ERR(dchild);
- if (IS_ERR(dchild))
- goto out_nfserr;
- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
- if (err)
- goto out;
- } else {
- /* called from nfsd_proc_create */
- dchild = dget(resfhp->fh_dentry);
- if (!fhp->fh_locked) {
- /* not actually possible */
- printk(KERN_ERR
- "nfsd_create: parent %pd2 not locked!\n",
+ dchild = dget(resfhp->fh_dentry);
+ if (!fhp->fh_locked) {
+ WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
dentry);
- err = nfserr_io;
- goto out;
- }
- }
- /*
- * Make sure the child dentry is still negative ...
- */
- err = nfserr_exist;
- if (d_really_is_positive(dchild)) {
- dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
- dentry, dchild);
- goto out;
+ err = nfserr_io;
+ goto out;
}
+ err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
+ if (err)
+ goto out;
+
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
- err = nfserr_inval;
- if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
- printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
- type);
- goto out;
- }
-
- /*
- * Get the dir op function pointer.
- */
err = 0;
host_err = 0;
switch (type) {
case S_IFSOCK:
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break;
+ default:
+ printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
+ type);
+ host_err = -EINVAL;
}
if (host_err < 0)
goto out_nfserr;
/*
* nfsd_create_setattr already committed the child. Transactional
* filesystems had a chance to commit changes for both parent and
- * child * simultaneously making the following commit_metadata a
+ * child simultaneously making the following commit_metadata a
* noop.
*/
err2 = nfserrno(commit_metadata(fhp));
if (!err)
err = fh_update(resfhp);
out:
- if (dchild && !IS_ERR(dchild))
- dput(dchild);
+ dput(dchild);
return err;
out_nfserr:
goto out;
}
+/*
+ * Create a filesystem object (regular, directory, special).
+ * Note that the parent directory is left locked.
+ *
+ * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
+ */
+__be32
+nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *fname, int flen, struct iattr *iap,
+ int type, dev_t rdev, struct svc_fh *resfhp)
+{
+ struct dentry *dentry, *dchild = NULL;
+ struct inode *dirp;
+ __be32 err;
+ int host_err;
+
+ if (isdotent(fname, flen))
+ return nfserr_exist;
+
+ err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP);
+ if (err)
+ return err;
+
+ dentry = fhp->fh_dentry;
+ dirp = d_inode(dentry);
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ return nfserrno(host_err);
+
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
+ dchild = lookup_one_len(fname, dentry, flen);
+ host_err = PTR_ERR(dchild);
+ if (IS_ERR(dchild))
+ return nfserrno(host_err);
+ err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+ if (err) {
+ dput(dchild);
+ return err;
+ }
+ return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
+ rdev, resfhp);
+}
+
#ifdef CONFIG_NFSD_V3
/*
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
- /* Get all the sanity checks out of the way before
- * we lock the parent. */
- err = nfserr_notdir;
- if (!dirp->i_op->lookup)
- goto out;
-
host_err = fh_want_write(fhp);
if (host_err)
goto out_nfserr;
__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
u64, u64);
#endif /* CONFIG_NFSD_V4 */
+__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
struct nfsd4_session *session;
struct nfsd4_slot *slot;
int data_offset;
+ bool spo_must_allowed;
size_t iovlen;
u32 minorversion;
__be32 status;
clientid_t clientid;
u32 seqid;
int spa_how;
+ u32 spo_must_enforce[3];
+ u32 spo_must_allow[3];
};
struct nfsd4_sequence {
}
+
+bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
struct nfsd4_compoundargs *);
if (err)
return err;
- err = ubifs_wbuf_sync_nolock(wbuf);
- if (err)
- return err;
-
err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
if (err)
return err;
c->max_write_shift = fls(c->max_write_size) - 1;
if (c->leb_size < UBIFS_MIN_LEB_SZ) {
- ubifs_err(c, "too small LEBs (%d bytes), min. is %d bytes",
- c->leb_size, UBIFS_MIN_LEB_SZ);
+ ubifs_errc(c, "too small LEBs (%d bytes), min. is %d bytes",
+ c->leb_size, UBIFS_MIN_LEB_SZ);
return -EINVAL;
}
if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
- ubifs_err(c, "too few LEBs (%d), min. is %d",
- c->leb_cnt, UBIFS_MIN_LEB_CNT);
+ ubifs_errc(c, "too few LEBs (%d), min. is %d",
+ c->leb_cnt, UBIFS_MIN_LEB_CNT);
return -EINVAL;
}
if (!is_power_of_2(c->min_io_size)) {
- ubifs_err(c, "bad min. I/O size %d", c->min_io_size);
+ ubifs_errc(c, "bad min. I/O size %d", c->min_io_size);
return -EINVAL;
}
if (c->max_write_size < c->min_io_size ||
c->max_write_size % c->min_io_size ||
!is_power_of_2(c->max_write_size)) {
- ubifs_err(c, "bad write buffer size %d for %d min. I/O unit",
- c->max_write_size, c->min_io_size);
+ ubifs_errc(c, "bad write buffer size %d for %d min. I/O unit",
+ c->max_write_size, c->min_io_size);
return -EINVAL;
}
*/
ubi = open_ubi(name, UBI_READONLY);
if (IS_ERR(ubi)) {
- pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
- current->pid, name, (int)PTR_ERR(ubi));
+ if (!(flags & MS_SILENT))
+ pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
+ current->pid, name, (int)PTR_ERR(ubi));
return ERR_CAST(ubi);
}
__printf(2, 3)
void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);
/*
- * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
- * object as an argument.
+ * A conditional variant of 'ubifs_err()' which doesn't output anything
+ * if probing (ie. MS_SILENT set).
*/
#define ubifs_errc(c, fmt, ...) \
do { \
return __ubifs_removexattr(inode, name);
}
-const struct xattr_handler ubifs_user_xattr_handler = {
+static const struct xattr_handler ubifs_user_xattr_handler = {
.prefix = XATTR_USER_PREFIX,
.get = ubifs_xattr_get,
.set = ubifs_xattr_set,
};
-const struct xattr_handler ubifs_trusted_xattr_handler = {
+static const struct xattr_handler ubifs_trusted_xattr_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.get = ubifs_xattr_get,
.set = ubifs_xattr_set,
};
-const struct xattr_handler ubifs_security_xattr_handler = {
+static const struct xattr_handler ubifs_security_xattr_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = ubifs_xattr_get,
.set = ubifs_xattr_set,
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
-xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o
-xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o
+xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
.commit_metadata = xfs_fs_nfs_commit_metadata,
-#ifdef CONFIG_NFSD_BLOCKLAYOUT
+#ifdef CONFIG_EXPORTFS_BLOCK_OPS
.get_uuid = xfs_fs_get_uuid,
.map_blocks = xfs_fs_map_blocks,
.commit_blocks = xfs_fs_commit_blocks,
#ifndef _XFS_PNFS_H
#define _XFS_PNFS_H 1
-#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
+#ifdef CONFIG_EXPORTFS_BLOCK_OPS
int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
struct iomap *iomap, bool write, u32 *device_generation);
{
return 0;
}
-#endif /* CONFIG_NFSD_PNFS */
+#endif /* CONFIG_EXPORTFS_BLOCK_OPS */
#endif /* _XFS_PNFS_H */
+++ /dev/null
-/*
- * include/asm-generic/rtc.h
- *
- * Author: Tom Rini <trini@mvista.com>
- *
- * Based on:
- * drivers/char/rtc.c
- *
- * Please read the COPYING file for all license details.
- */
-
-#ifndef __ASM_RTC_H__
-#define __ASM_RTC_H__
-
-#include <linux/mc146818rtc.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-#include <linux/delay.h>
-#ifdef CONFIG_ACPI
-#include <linux/acpi.h>
-#endif
-
-#define RTC_PIE 0x40 /* periodic interrupt enable */
-#define RTC_AIE 0x20 /* alarm interrupt enable */
-#define RTC_UIE 0x10 /* update-finished interrupt enable */
-
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100 /* battery bad */
-#define RTC_SQWE 0x08 /* enable square-wave output */
-#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-/*
- * Returns true if a clock update is in progress
- */
-static inline unsigned char rtc_is_updating(void)
-{
- unsigned char uip;
- unsigned long flags;
-
- spin_lock_irqsave(&rtc_lock, flags);
- uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return uip;
-}
-
-static inline unsigned int __get_rtc_time(struct rtc_time *time)
-{
- unsigned char ctrl;
- unsigned long flags;
- unsigned char century = 0;
-
-#ifdef CONFIG_MACH_DECSTATION
- unsigned int real_year;
-#endif
-
- /*
- * read RTC once any update in progress is done. The update
- * can take just over 2ms. We wait 20ms. There is no need to
- * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
- * If you need to know *exactly* when a second has started, enable
- * periodic update complete interrupts, (via ioctl) and then
- * immediately read /dev/rtc which will block until you get the IRQ.
- * Once the read clears, read the RTC time (again via ioctl). Easy.
- */
- if (rtc_is_updating())
- mdelay(20);
-
- /*
- * Only the values that we read from the RTC are set. We leave
- * tm_wday, tm_yday and tm_isdst untouched. Even though the
- * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
- * by the RTC when initially set to a non-zero value.
- */
- spin_lock_irqsave(&rtc_lock, flags);
- time->tm_sec = CMOS_READ(RTC_SECONDS);
- time->tm_min = CMOS_READ(RTC_MINUTES);
- time->tm_hour = CMOS_READ(RTC_HOURS);
- time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
- time->tm_mon = CMOS_READ(RTC_MONTH);
- time->tm_year = CMOS_READ(RTC_YEAR);
-#ifdef CONFIG_MACH_DECSTATION
- real_year = CMOS_READ(RTC_DEC_YEAR);
-#endif
-#ifdef CONFIG_ACPI
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century)
- century = CMOS_READ(acpi_gbl_FADT.century);
-#endif
- ctrl = CMOS_READ(RTC_CONTROL);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- {
- time->tm_sec = bcd2bin(time->tm_sec);
- time->tm_min = bcd2bin(time->tm_min);
- time->tm_hour = bcd2bin(time->tm_hour);
- time->tm_mday = bcd2bin(time->tm_mday);
- time->tm_mon = bcd2bin(time->tm_mon);
- time->tm_year = bcd2bin(time->tm_year);
- century = bcd2bin(century);
- }
-
-#ifdef CONFIG_MACH_DECSTATION
- time->tm_year += real_year - 72;
-#endif
-
- if (century)
- time->tm_year += (century - 19) * 100;
-
- /*
- * Account for differences between how the RTC uses the values
- * and how they are defined in a struct rtc_time;
- */
- if (time->tm_year <= 69)
- time->tm_year += 100;
-
- time->tm_mon--;
-
- return RTC_24H;
-}
-
-#ifndef get_rtc_time
-#define get_rtc_time __get_rtc_time
-#endif
-
-/* Set the current date and time in the real time clock. */
-static inline int __set_rtc_time(struct rtc_time *time)
-{
- unsigned long flags;
- unsigned char mon, day, hrs, min, sec;
- unsigned char save_control, save_freq_select;
- unsigned int yrs;
-#ifdef CONFIG_MACH_DECSTATION
- unsigned int real_yrs, leap_yr;
-#endif
- unsigned char century = 0;
-
- yrs = time->tm_year;
- mon = time->tm_mon + 1; /* tm_mon starts at zero */
- day = time->tm_mday;
- hrs = time->tm_hour;
- min = time->tm_min;
- sec = time->tm_sec;
-
- if (yrs > 255) /* They are unsigned */
- return -EINVAL;
-
- spin_lock_irqsave(&rtc_lock, flags);
-#ifdef CONFIG_MACH_DECSTATION
- real_yrs = yrs;
- leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) ||
- !((yrs + 1900) % 400));
- yrs = 72;
-
- /*
- * We want to keep the year set to 73 until March
- * for non-leap years, so that Feb, 29th is handled
- * correctly.
- */
- if (!leap_yr && mon < 3) {
- real_yrs--;
- yrs = 73;
- }
-#endif
-
-#ifdef CONFIG_ACPI
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century) {
- century = (yrs + 1900) / 100;
- yrs %= 100;
- }
-#endif
-
- /* These limits and adjustments are independent of
- * whether the chip is in binary mode or not.
- */
- if (yrs > 169) {
- spin_unlock_irqrestore(&rtc_lock, flags);
- return -EINVAL;
- }
-
- if (yrs >= 100)
- yrs -= 100;
-
- if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
- || RTC_ALWAYS_BCD) {
- sec = bin2bcd(sec);
- min = bin2bcd(min);
- hrs = bin2bcd(hrs);
- day = bin2bcd(day);
- mon = bin2bcd(mon);
- yrs = bin2bcd(yrs);
- century = bin2bcd(century);
- }
-
- save_control = CMOS_READ(RTC_CONTROL);
- CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-#ifdef CONFIG_MACH_DECSTATION
- CMOS_WRITE(real_yrs, RTC_DEC_YEAR);
-#endif
- CMOS_WRITE(yrs, RTC_YEAR);
- CMOS_WRITE(mon, RTC_MONTH);
- CMOS_WRITE(day, RTC_DAY_OF_MONTH);
- CMOS_WRITE(hrs, RTC_HOURS);
- CMOS_WRITE(min, RTC_MINUTES);
- CMOS_WRITE(sec, RTC_SECONDS);
-#ifdef CONFIG_ACPI
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- acpi_gbl_FADT.century)
- CMOS_WRITE(century, acpi_gbl_FADT.century);
-#endif
-
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return 0;
-}
-
-#ifndef set_rtc_time
-#define set_rtc_time __set_rtc_time
-#endif
-
-static inline unsigned int get_rtc_ss(void)
-{
- struct rtc_time h;
-
- get_rtc_time(&h);
- return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-
-#endif /* __ASM_RTC_H__ */
extern int setup_arg_pages(struct linux_binprm * bprm,
unsigned long stack_top,
int executable_stack);
+extern int transfer_args_to_stack(struct linux_binprm *bprm,
+ unsigned long *sp_location);
extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
extern int copy_strings_kernel(int argc, const char *const *argv,
struct linux_binprm *bprm);
+++ /dev/null
-/*
- * Copyright (C) 1998, 1999, 2003 Ralf Baechle
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __LINUX_DS1286_H
-#define __LINUX_DS1286_H
-
-/**********************************************************************
- * register summary
- **********************************************************************/
-#define RTC_HUNDREDTH_SECOND 0
-#define RTC_SECONDS 1
-#define RTC_MINUTES 2
-#define RTC_MINUTES_ALARM 3
-#define RTC_HOURS 4
-#define RTC_HOURS_ALARM 5
-#define RTC_DAY 6
-#define RTC_DAY_ALARM 7
-#define RTC_DATE 8
-#define RTC_MONTH 9
-#define RTC_YEAR 10
-#define RTC_CMD 11
-#define RTC_WHSEC 12
-#define RTC_WSEC 13
-#define RTC_UNUSED 14
-
-/* RTC_*_alarm is always true if 2 MSBs are set */
-# define RTC_ALARM_DONT_CARE 0xC0
-
-
-/*
- * Bits in the month register
- */
-#define RTC_EOSC 0x80
-#define RTC_ESQW 0x40
-
-/*
- * Bits in the Command register
- */
-#define RTC_TDF 0x01
-#define RTC_WAF 0x02
-#define RTC_TDM 0x04
-#define RTC_WAM 0x08
-#define RTC_PU_LVL 0x10
-#define RTC_IBH_LO 0x20
-#define RTC_IPSW 0x40
-#define RTC_TE 0x80
-
-#endif /* __LINUX_DS1286_H */
+++ /dev/null
-/*
- * ds17287rtc.h - register definitions for the ds1728[57] RTC / CMOS RAM
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * (C) 2003 Guido Guenther <agx@sigxcpu.org>
- */
-#ifndef __LINUX_DS17287RTC_H
-#define __LINUX_DS17287RTC_H
-
-#include <linux/rtc.h> /* get the user-level API */
-#include <linux/mc146818rtc.h>
-
-/* Register A */
-#define DS_REGA_DV2 0x40 /* countdown chain */
-#define DS_REGA_DV1 0x20 /* oscillator enable */
-#define DS_REGA_DV0 0x10 /* bank select */
-
-/* bank 1 registers */
-#define DS_B1_MODEL 0x40 /* model number byte */
-#define DS_B1_SN1 0x41 /* serial number byte 1 */
-#define DS_B1_SN2 0x42 /* serial number byte 2 */
-#define DS_B1_SN3 0x43 /* serial number byte 3 */
-#define DS_B1_SN4 0x44 /* serial number byte 4 */
-#define DS_B1_SN5 0x45 /* serial number byte 5 */
-#define DS_B1_SN6 0x46 /* serial number byte 6 */
-#define DS_B1_CRC 0x47 /* CRC byte */
-#define DS_B1_CENTURY 0x48 /* Century byte */
-#define DS_B1_DALARM 0x49 /* date alarm */
-#define DS_B1_XCTRL4A 0x4a /* extendec control register 4a */
-#define DS_B1_XCTRL4B 0x4b /* extendec control register 4b */
-#define DS_B1_RTCADDR2 0x4e /* rtc address 2 */
-#define DS_B1_RTCADDR3 0x4f /* rtc address 3 */
-#define DS_B1_RAMLSB 0x50 /* extended ram LSB */
-#define DS_B1_RAMMSB 0x51 /* extended ram MSB */
-#define DS_B1_RAMDPORT 0x53 /* extended ram data port */
-
-/* register details */
-/* extended control register 4a */
-#define DS_XCTRL4A_VRT2 0x80 /* valid ram and time */
-#define DS_XCTRL4A_INCR 0x40 /* increment progress status */
-#define DS_XCTRL4A_BME 0x20 /* burst mode enable */
-#define DS_XCTRL4A_PAB 0x08 /* power active bar ctrl */
-#define DS_XCTRL4A_RF 0x04 /* ram clear flag */
-#define DS_XCTRL4A_WF 0x02 /* wake up alarm flag */
-#define DS_XCTRL4A_KF 0x01 /* kickstart flag */
-
-/* interrupt causes */
-#define DS_XCTRL4A_IFS (DS_XCTRL4A_RF|DS_XCTRL4A_WF|DS_XCTRL4A_KF)
-
-/* extended control register 4b */
-#define DS_XCTRL4B_ABE 0x80 /* auxiliary battery enable */
-#define DS_XCTRL4B_E32K 0x40 /* enable 32.768 kHz Output */
-#define DS_XCTRL4B_CS 0x20 /* crystal select */
-#define DS_XCTRL4B_RCE 0x10 /* ram clear enable */
-#define DS_XCTRL4B_PRS 0x08 /* PAB resec select */
-#define DS_XCTRL4B_RIE 0x04 /* ram clear interrupt enable */
-#define DS_XCTRL4B_WFE 0x02 /* wake up alarm interrupt enable */
-#define DS_XCTRL4B_KFE 0x01 /* kickstart interrupt enable */
-
-/* interrupt enable bits */
-#define DS_XCTRL4B_IFES (DS_XCTRL4B_RIE|DS_XCTRL4B_WFE|DS_XCTRL4B_KFE)
-
-#endif /* __LINUX_DS17287RTC_H */
void i8042_lock_chip(void);
void i8042_unlock_chip(void);
int i8042_command(unsigned char *param, int command);
-bool i8042_check_port_owner(const struct serio *);
int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
struct serio *serio));
int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
return -ENODEV;
}
-static inline bool i8042_check_port_owner(const struct serio *serio)
-{
- return false;
-}
-
static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
struct serio *serio))
{
+++ /dev/null
-/*
- * ST M48T86 / Dallas DS12887 RTC driver
- * Copyright (c) 2006 Tower Technologies
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-struct m48t86_ops
-{
- void (*writebyte)(unsigned char value, unsigned long addr);
- unsigned char (*readbyte)(unsigned long addr);
-};
#include <asm/io.h>
#include <linux/rtc.h> /* get the user-level API */
#include <asm/mc146818rtc.h> /* register access macros */
+#include <linux/bcd.h>
+#include <linux/delay.h>
#ifdef __KERNEL__
#include <linux/spinlock.h> /* spinlock_t */
#define RTC_IO_EXTENT_USED RTC_IO_EXTENT
#endif /* ARCH_RTC_LOCATION */
+unsigned int mc146818_get_time(struct rtc_time *time);
+int mc146818_set_time(struct rtc_time *time);
+
#endif /* _MC146818RTC_H */
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34,
+ MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35,
};
enum {
VXLAN_STEER_BY_INNER_VLAN = 1 << 4,
};
+enum {
+ MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS = 0x2,
+};
int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
enum mlx4_net_trans_promisc_mode mode);
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_SYNC_TPT(struct mlx4_dev *dev);
int mlx4_test_interrupts(struct mlx4_dev *dev);
+int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
+ const u32 offset[], u32 value[],
+ size_t array_len, u8 port);
u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port);
bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector);
struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port);
void (*comp)(struct mlx5_core_cq *);
void *priv;
} tasklet_ctx;
+ int reset_notify_added;
+ struct list_head reset_notify;
};
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
+#include <linux/mlx5/srq.h>
enum {
MLX5_RQ_BITMASK_VSD = 1 << 1,
void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
struct mlx5_cmd_mailbox *head);
int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_create_srq_mbox_in *in, int inlen,
- int is_xrc);
+ struct mlx5_srq_attr *in);
int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_query_srq_mbox_out *out);
+ struct mlx5_srq_attr *out);
int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
u16 lwm, int is_srq);
void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
struct mlx5_modify_qp_mbox_in {
struct mlx5_inbox_hdr hdr;
__be32 qpn;
- u8 rsvd1[4];
- __be32 optparam;
u8 rsvd0[4];
+ __be32 optparam;
+ u8 rsvd1[4];
struct mlx5_qp_context ctx;
u8 rsvd2[16];
};
#include <linux/mlx5/driver.h>
+enum {
+ MLX5_SRQ_FLAG_ERR = (1 << 0),
+ MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
+};
+
+struct mlx5_srq_attr {
+ u32 type;
+ u32 flags;
+ u32 log_size;
+ u32 wqe_shift;
+ u32 log_page_size;
+ u32 wqe_cnt;
+ u32 srqn;
+ u32 xrcd;
+ u32 page_offset;
+ u32 cqn;
+ u32 pd;
+ u32 lwm;
+ u32 user_index;
+ u64 db_record;
+ u64 *pas;
+};
+
+struct mlx5_core_dev;
+
void mlx5_init_srq_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
};
+#define NFS4_OP_MAP_NUM_LONGS \
+ DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
+#define NFS4_OP_MAP_NUM_WORDS \
+ (NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
+struct nfs4_op_map {
+ union {
+ unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
+ u32 words[NFS4_OP_MAP_NUM_WORDS];
+ } u;
+};
+
#endif
struct pnfs_commit_bucket *buckets;
};
-#define NFS4_OP_MAP_NUM_LONGS \
- DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
-#define NFS4_OP_MAP_NUM_WORDS \
- (NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
-struct nfs4_op_map {
- union {
- unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
- u32 words[NFS4_OP_MAP_NUM_WORDS];
- } u;
-};
-
struct nfs41_state_protection {
u32 how;
struct nfs4_op_map enforce;
--- /dev/null
+/*
+ * ds2404.h - platform data structure for the DS2404 RTC.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org>
+ */
+
+#ifndef __LINUX_DS2404_H
+#define __LINUX_DS2404_H
+
+struct ds2404_platform_data {
+
+ unsigned int gpio_rst;
+ unsigned int gpio_clk;
+ unsigned int gpio_dq;
+};
+#endif
--- /dev/null
+/*
+ * ST M48T86 / Dallas DS12887 RTC driver
+ * Copyright (c) 2006 Tower Technologies
+ *
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+struct m48t86_ops
+{
+ void (*writebyte)(unsigned char value, unsigned long addr);
+ unsigned char (*readbyte)(unsigned long addr);
+};
--- /dev/null
+/*
+ * v3020.h - Registers definition and platform data structure for the v3020 RTC.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006, 8D Technologies inc.
+ */
+#ifndef __LINUX_V3020_H
+#define __LINUX_V3020_H
+
+/* The v3020 has only one data pin but which one
+ * is used depends on the board. */
+struct v3020_platform_data {
+ int leftshift; /* (1<<(leftshift)) & readl() */
+
+ unsigned int use_gpio:1;
+ unsigned int gpio_cs;
+ unsigned int gpio_wr;
+ unsigned int gpio_rd;
+ unsigned int gpio_io;
+};
+
+#define V3020_STATUS_0 0x00
+#define V3020_STATUS_1 0x01
+#define V3020_SECONDS 0x02
+#define V3020_MINUTES 0x03
+#define V3020_HOURS 0x04
+#define V3020_MONTH_DAY 0x05
+#define V3020_MONTH 0x06
+#define V3020_YEAR 0x07
+#define V3020_WEEK_DAY 0x08
+#define V3020_WEEK 0x09
+
+#define V3020_IS_COMMAND(val) ((val)>=0x0E)
+
+#define V3020_CMD_RAM2CLOCK 0x0E
+#define V3020_CMD_CLOCK2RAM 0x0F
+
+#endif /* __LINUX_V3020_H */
+++ /dev/null
-/*
- * ds2404.h - platform data structure for the DS2404 RTC.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org>
- */
-
-#ifndef __LINUX_DS2404_H
-#define __LINUX_DS2404_H
-
-struct ds2404_platform_data {
-
- unsigned int gpio_rst;
- unsigned int gpio_clk;
- unsigned int gpio_dq;
-};
-#endif
+++ /dev/null
-/*
- * v3020.h - Registers definition and platform data structure for the v3020 RTC.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2006, 8D Technologies inc.
- */
-#ifndef __LINUX_V3020_H
-#define __LINUX_V3020_H
-
-/* The v3020 has only one data pin but which one
- * is used depends on the board. */
-struct v3020_platform_data {
- int leftshift; /* (1<<(leftshift)) & readl() */
-
- unsigned int use_gpio:1;
- unsigned int gpio_cs;
- unsigned int gpio_wr;
- unsigned int gpio_rd;
- unsigned int gpio_io;
-};
-
-#define V3020_STATUS_0 0x00
-#define V3020_STATUS_1 0x01
-#define V3020_SECONDS 0x02
-#define V3020_MINUTES 0x03
-#define V3020_HOURS 0x04
-#define V3020_MONTH_DAY 0x05
-#define V3020_MONTH 0x06
-#define V3020_YEAR 0x07
-#define V3020_WEEK_DAY 0x08
-#define V3020_WEEK 0x09
-
-#define V3020_IS_COMMAND(val) ((val)>=0x0E)
-
-#define V3020_CMD_RAM2CLOCK 0x0E
-#define V3020_CMD_CLOCK2RAM 0x0F
-
-#endif /* __LINUX_V3020_H */
--- /dev/null
+/*
+ * Copyright (C) 1998, 1999, 2003 Ralf Baechle
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __LINUX_DS1286_H
+#define __LINUX_DS1286_H
+
+/**********************************************************************
+ * register summary
+ **********************************************************************/
+#define RTC_HUNDREDTH_SECOND 0
+#define RTC_SECONDS 1
+#define RTC_MINUTES 2
+#define RTC_MINUTES_ALARM 3
+#define RTC_HOURS 4
+#define RTC_HOURS_ALARM 5
+#define RTC_DAY 6
+#define RTC_DAY_ALARM 7
+#define RTC_DATE 8
+#define RTC_MONTH 9
+#define RTC_YEAR 10
+#define RTC_CMD 11
+#define RTC_WHSEC 12
+#define RTC_WSEC 13
+#define RTC_UNUSED 14
+
+/* RTC_*_alarm is always true if 2 MSBs are set */
+# define RTC_ALARM_DONT_CARE 0xC0
+
+
+/*
+ * Bits in the month register
+ */
+#define RTC_EOSC 0x80
+#define RTC_ESQW 0x40
+
+/*
+ * Bits in the Command register
+ */
+#define RTC_TDF 0x01
+#define RTC_WAF 0x02
+#define RTC_TDM 0x04
+#define RTC_WAM 0x08
+#define RTC_PU_LVL 0x10
+#define RTC_IBH_LO 0x20
+#define RTC_IPSW 0x40
+#define RTC_TE 0x80
+
+#endif /* __LINUX_DS1286_H */
struct serio_device_id id;
- spinlock_t lock; /* protects critical sections from port's interrupt handler */
+ /* Protects critical sections from port's interrupt handler */
+ spinlock_t lock;
int (*write)(struct serio *, unsigned char);
int (*open)(struct serio *);
void (*stop)(struct serio *);
struct serio *parent;
- struct list_head child_node; /* Entry in parent->children list */
+ /* Entry in parent->children list */
+ struct list_head child_node;
struct list_head children;
- unsigned int depth; /* level of nesting in serio hierarchy */
+ /* Level of nesting in serio hierarchy */
+ unsigned int depth;
- struct serio_driver *drv; /* accessed from interrupt, must be protected by serio->lock and serio->sem */
- struct mutex drv_mutex; /* protects serio->drv so attributes can pin driver */
+ /*
+ * serio->drv is accessed from interrupt handlers; when modifying
+ * caller should acquire serio->drv_mutex and serio->lock.
+ */
+ struct serio_driver *drv;
+ /* Protects serio->drv so attributes can pin current driver */
+ struct mutex drv_mutex;
struct device dev;
struct list_head node;
+
+ /*
+ * For use by PS/2 layer when several ports share hardware and
+ * may get indigestion when exposed to concurrent access (i8042).
+ */
+ struct mutex *ps2_cmd_mutex;
};
#define to_serio_port(d) container_of(d, struct serio, dev)
struct hlist_head * hash_table;
rwlock_t hash_lock;
- atomic_t inuse; /* active user-space update or lookup */
-
char *name;
void (*cache_put)(struct kref *);
* cache pages */
#define RQ_VICTIM (5) /* about to be shut down */
#define RQ_BUSY (6) /* request is busy */
+#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */
void * rq_argp; /* decoded arguments */
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *);
- void (*xpo_adjust_wspace)(struct svc_xprt *);
};
struct svc_xprt_class {
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
+ atomic_t xpt_nr_rqsts; /* Number of requests */
struct mutex xpt_mutex; /* to serialize sending data */
spinlock_t xpt_lock; /* protects sk_deferred
* and xpt_auth_cache */
IB_SA_BEST = 3
};
+/*
+ * There are 4 types of join states:
+ * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
+ * The order corresponds to JoinState bits in MCMemberRecord.
+ */
+enum ib_sa_mc_join_states {
+ FULLMEMBER_JOIN,
+ NONMEMBER_JOIN,
+ SENDONLY_NONMEBER_JOIN,
+ SENDONLY_FULLMEMBER_JOIN,
+ NUM_JOIN_MEMBERSHIP_TYPES,
+};
+
#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12)
/*
IB_EVENT_QP_LAST_WQE_REACHED,
IB_EVENT_CLIENT_REREGISTER,
IB_EVENT_GID_CHANGE,
+ IB_EVENT_WQ_FATAL,
};
const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_srq *srq;
+ struct ib_wq *wq;
u8 port_num;
} element;
enum ib_event_type event;
* Only needed for special QP types, or when using the RW API.
*/
u8 port_num;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
};
struct ib_qp_open_attr {
struct list_head ah_list;
struct list_head xrcd_list;
struct list_head rule_list;
+ struct list_head wq_list;
+ struct list_head rwq_ind_tbl_list;
int closing;
struct pid *tgid;
} ext;
};
+enum ib_wq_type {
+ IB_WQT_RQ
+};
+
+enum ib_wq_state {
+ IB_WQS_RESET,
+ IB_WQS_RDY,
+ IB_WQS_ERR
+};
+
+struct ib_wq {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ void *wq_context;
+ void (*event_handler)(struct ib_event *, void *);
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ u32 wq_num;
+ enum ib_wq_state state;
+ enum ib_wq_type wq_type;
+ atomic_t usecnt;
+};
+
+struct ib_wq_init_attr {
+ void *wq_context;
+ enum ib_wq_type wq_type;
+ u32 max_wr;
+ u32 max_sge;
+ struct ib_cq *cq;
+ void (*event_handler)(struct ib_event *, void *);
+};
+
+enum ib_wq_attr_mask {
+ IB_WQ_STATE = 1 << 0,
+ IB_WQ_CUR_STATE = 1 << 1,
+};
+
+struct ib_wq_attr {
+ enum ib_wq_state wq_state;
+ enum ib_wq_state curr_wq_state;
+};
+
+struct ib_rwq_ind_table {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ atomic_t usecnt;
+ u32 ind_tbl_num;
+ u32 log_ind_tbl_size;
+ struct ib_wq **ind_tbl;
+};
+
+struct ib_rwq_ind_table_init_attr {
+ u32 log_ind_tbl_size;
+ /* Each entry is a pointer to Receive Work Queue */
+ struct ib_wq **ind_tbl;
+};
+
+/*
+ * @max_write_sge: Maximum SGE elements per RDMA WRITE request.
+ * @max_read_sge: Maximum SGE elements per RDMA READ request.
+ */
struct ib_qp {
struct ib_device *device;
struct ib_pd *pd;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
+ u32 max_write_sge;
+ u32 max_read_sge;
enum ib_qp_type qp_type;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
};
struct ib_mr {
IB_FLOW_SPEC_IB = 0x22,
/* L3 header*/
IB_FLOW_SPEC_IPV4 = 0x30,
+ IB_FLOW_SPEC_IPV6 = 0x31,
/* L4 headers*/
IB_FLOW_SPEC_TCP = 0x40,
IB_FLOW_SPEC_UDP = 0x41
struct ib_flow_ipv4_filter mask;
};
+struct ib_flow_ipv6_filter {
+ u8 src_ip[16];
+ u8 dst_ip[16];
+};
+
+struct ib_flow_spec_ipv6 {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ipv6_filter val;
+ struct ib_flow_ipv6_filter mask;
+};
+
struct ib_flow_tcp_udp_filter {
__be16 dst_port;
__be16 src_port;
struct ib_flow_spec_ib ib;
struct ib_flow_spec_ipv4 ipv4;
struct ib_flow_spec_tcp_udp tcp_udp;
+ struct ib_flow_spec_ipv6 ipv6;
};
struct ib_flow_attr {
struct ifla_vf_stats *stats);
int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
int type);
-
+ struct ib_wq * (*create_wq)(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq);
+ int (*modify_wq)(struct ib_wq *wq,
+ struct ib_wq_attr *attr,
+ u32 wq_attr_mask,
+ struct ib_udata *udata);
+ struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
* in fast paths.
*/
int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
+ void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len);
};
struct ib_client {
struct ib_device *ib_alloc_device(size_t size);
void ib_dealloc_device(struct ib_device *device);
+void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len);
+
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
+struct ib_wq *ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr);
+int ib_destroy_wq(struct ib_wq *wq);
+int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
+ u32 wq_attr_mask);
+struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr*
+ wq_ind_table_init_attr);
+int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
#if !defined(OPA_PORT_INFO_H)
#define OPA_PORT_INFO_H
-/* Temporary until HFI driver is updated */
-#ifndef USE_PI_LED_ENABLE
-#define USE_PI_LED_ENABLE 0
-#endif
-
#define OPA_PORT_LINK_MODE_NOP 0 /* No change */
#define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */
OPA_PI_MASK_MTU_CAP = 0x0F,
};
-#if USE_PI_LED_ENABLE
struct opa_port_states {
u8 reserved;
u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */
u8 reserved2;
u8 portphysstate_portstate; /* 4 bits, 4 bits */
};
-#define PI_LED_ENABLE_SUP 1
-#else
-struct opa_port_states {
- u8 reserved;
- u8 offline_reason; /* 2 res, 6 bits */
- u8 reserved2;
- u8 portphysstate_portstate; /* 4 bits, 4 bits */
-};
-#define PI_LED_ENABLE_SUP 0
-#endif
struct opa_port_state_info {
struct opa_port_states port_states;
* address.
* @id: Communication identifier associated with the request.
* @addr: Multicast address identifying the group to join.
+ * @join_state: Multicast JoinState bitmap requested by port.
+ * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits.
* @context: User-defined context associated with the join request, returned
* to the user through the private_data pointer in multicast events.
*/
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
- void *context);
+ u8 join_state, void *context);
/**
* rdma_leave_multicast - Leave the multicast group specified by the given
u32 max_mad_size;
u8 qos_shift;
u8 max_rdma_atomic;
+ u8 reserved_operations;
};
/* Protection domain */
/* Driver specific properties */
struct rvt_driver_params dparms;
+ /* post send table */
+ const struct rvt_operation_params *post_parms;
+
struct rvt_mregion __rcu *dma_mr;
struct rvt_lkey_table lkey_table;
int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
int port_index, u16 *pkey_table);
+int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
+ int access);
+int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc);
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
u32 mapsz; /* size of the map array */
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
u8 lkey_published; /* in global table */
+ atomic_t lkey_invalid; /* true if current lkey is invalid */
struct completion comp; /* complete when refcount goes to zero */
atomic_t refcount;
struct rvt_segarray *map[0]; /* the segments */
#define RVT_PROCESS_OR_FLUSH_SEND \
(RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND)
+/*
+ * Internal send flags
+ */
+#define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START
+#define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1)
+
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
* to send a RDMA read response or atomic operation.
*/
struct rvt_ack_entry {
- u8 opcode;
- u8 sent;
+ struct rvt_sge rdma_sge;
+ u64 atomic_data;
u32 psn;
u32 lpsn;
- union {
- struct rvt_sge rdma_sge;
- u64 atomic_data;
- };
+ u8 opcode;
+ u8 sent;
};
#define RC_QP_SCALING_INTERVAL 5
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
+#define RVT_OPERATION_PRIV 0x00000001
+#define RVT_OPERATION_ATOMIC 0x00000002
+#define RVT_OPERATION_ATOMIC_SGE 0x00000004
+#define RVT_OPERATION_LOCAL 0x00000008
+#define RVT_OPERATION_USE_RESERVE 0x00000010
+
+#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
+
+/**
+ * rvt_operation_params - op table entry
+ * @length - the length to copy into the swqe entry
+ * @qpt_support - a bit mask indicating QP type support
+ * @flags - RVT_OPERATION flags (see above)
*
+ * This supports table driven post send so that
+ * the driver can have differing an potentially
+ * different sets of operations.
+ *
+ **/
+
+struct rvt_operation_params {
+ size_t length;
+ u32 qpt_support;
+ u32 flags;
+};
+
+/*
* Common variables are protected by both r_rq.lock and s_lock in that order
* which only happens in modify_qp() or changing the QP 'state'.
*/
u32 s_next_psn; /* PSN for next request */
u32 s_avail; /* number of entries avail */
u32 s_ssn; /* SSN of tail entry */
+ atomic_t s_reserved_used; /* reserved entries in use */
spinlock_t s_lock ____cacheline_aligned_in_smp;
u32 s_flags;
struct rvt_sge_state s_ack_rdma_sge;
struct timer_list s_timer;
+ atomic_t local_ops_pending; /* number of fast_reg/local_inv reqs */
+
/*
* This sge list MUST be last. Do not add anything below here.
*/
rq->max_sge * sizeof(struct ib_sge)) * n);
}
+/**
+ * rvt_qp_wqe_reserve - reserve operation
+ * @qp - the rvt qp
+ * @wqe - the send wqe
+ *
+ * This routine used in post send to record
+ * a wqe relative reserved operation use.
+ */
+static inline void rvt_qp_wqe_reserve(
+ struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
+ atomic_inc(&qp->s_reserved_used);
+}
+
+/**
+ * rvt_qp_wqe_unreserve - clean reserved operation
+ * @qp - the rvt qp
+ * @wqe - the send wqe
+ *
+ * This decrements the reserve use count.
+ *
+ * This call MUST precede the change to
+ * s_last to insure that post send sees a stable
+ * s_avail.
+ *
+ * An smp_mp__after_atomic() is used to insure
+ * the compiler does not juggle the order of the s_last
+ * ring index and the decrementing of s_reserved_used.
+ */
+static inline void rvt_qp_wqe_unreserve(
+ struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
+ wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
+ atomic_dec(&qp->s_reserved_used);
+ /* insure no compiler re-order up to s_last change */
+ smp_mb__after_atomic();
+ }
+}
+
extern const int ib_rvt_state_ops[];
struct rvt_dev_info;
--- /dev/null
+/*****************************************************************************/
+/* srp.h -- SCSI RDMA Protocol definitions */
+/* */
+/* Written By: Colin Devilbis, IBM Corporation */
+/* */
+/* Copyright (C) 2003 IBM Corporation */
+/* */
+/* This program is free software; you can redistribute it and/or modify */
+/* it under the terms of the GNU General Public License as published by */
+/* the Free Software Foundation; either version 2 of the License, or */
+/* (at your option) any later version. */
+/* */
+/* This program is distributed in the hope that it will be useful, */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* */
+/* This file contains structures and definitions for IBM RPA (RS/6000 */
+/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */
+/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */
+/* commands between logical partitions. */
+/* */
+/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */
+/* between partitions. The definitions in this file are architected, */
+/* and cannot be changed without breaking compatibility with other versions */
+/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/
+/* between logical partitions */
+/*****************************************************************************/
+#ifndef VIOSRP_H
+#define VIOSRP_H
+#include <scsi/srp.h>
+
+#define SRP_VERSION "16.a"
+#define SRP_MAX_IU_LEN 256
+#define SRP_MAX_LOC_LEN 32
+
+union srp_iu {
+ struct srp_login_req login_req;
+ struct srp_login_rsp login_rsp;
+ struct srp_login_rej login_rej;
+ struct srp_i_logout i_logout;
+ struct srp_t_logout t_logout;
+ struct srp_tsk_mgmt tsk_mgmt;
+ struct srp_cmd cmd;
+ struct srp_rsp rsp;
+ u8 reserved[SRP_MAX_IU_LEN];
+};
+
+enum viosrp_crq_headers {
+ VIOSRP_CRQ_FREE = 0x00,
+ VIOSRP_CRQ_CMD_RSP = 0x80,
+ VIOSRP_CRQ_INIT_RSP = 0xC0,
+ VIOSRP_CRQ_XPORT_EVENT = 0xFF
+};
+
+enum viosrp_crq_init_formats {
+ VIOSRP_CRQ_INIT = 0x01,
+ VIOSRP_CRQ_INIT_COMPLETE = 0x02
+};
+
+enum viosrp_crq_formats {
+ VIOSRP_SRP_FORMAT = 0x01,
+ VIOSRP_MAD_FORMAT = 0x02,
+ VIOSRP_OS400_FORMAT = 0x03,
+ VIOSRP_AIX_FORMAT = 0x04,
+ VIOSRP_LINUX_FORMAT = 0x05,
+ VIOSRP_INLINE_FORMAT = 0x06
+};
+
+enum viosrp_crq_status {
+ VIOSRP_OK = 0x0,
+ VIOSRP_NONRECOVERABLE_ERR = 0x1,
+ VIOSRP_VIOLATES_MAX_XFER = 0x2,
+ VIOSRP_PARTNER_PANIC = 0x3,
+ VIOSRP_DEVICE_BUSY = 0x8,
+ VIOSRP_ADAPTER_FAIL = 0x10,
+ VIOSRP_OK2 = 0x99,
+};
+
+struct viosrp_crq {
+ u8 valid; /* used by RPA */
+ u8 format; /* SCSI vs out-of-band */
+ u8 reserved;
+ u8 status; /* non-scsi failure? (e.g. DMA failure) */
+ __be16 timeout; /* in seconds */
+ __be16 IU_length; /* in bytes */
+ __be64 IU_data_ptr; /* the TCE for transferring data */
+};
+
+/* MADs are Management requests above and beyond the IUs defined in the SRP
+ * standard.
+ */
+enum viosrp_mad_types {
+ VIOSRP_EMPTY_IU_TYPE = 0x01,
+ VIOSRP_ERROR_LOG_TYPE = 0x02,
+ VIOSRP_ADAPTER_INFO_TYPE = 0x03,
+ VIOSRP_CAPABILITIES_TYPE = 0x05,
+ VIOSRP_ENABLE_FAST_FAIL = 0x08,
+};
+
+enum viosrp_mad_status {
+ VIOSRP_MAD_SUCCESS = 0x00,
+ VIOSRP_MAD_NOT_SUPPORTED = 0xF1,
+ VIOSRP_MAD_FAILED = 0xF7,
+};
+
+enum viosrp_capability_type {
+ MIGRATION_CAPABILITIES = 0x01,
+ RESERVATION_CAPABILITIES = 0x02,
+};
+
+enum viosrp_capability_support {
+ SERVER_DOES_NOT_SUPPORTS_CAP = 0x0,
+ SERVER_SUPPORTS_CAP = 0x01,
+ SERVER_CAP_DATA = 0x02,
+};
+
+enum viosrp_reserve_type {
+ CLIENT_RESERVE_SCSI_2 = 0x01,
+};
+
+enum viosrp_capability_flag {
+ CLIENT_MIGRATED = 0x01,
+ CLIENT_RECONNECT = 0x02,
+ CAP_LIST_SUPPORTED = 0x04,
+ CAP_LIST_DATA = 0x08,
+};
+
+/*
+ * Common MAD header
+ */
+struct mad_common {
+ __be32 type;
+ __be16 status;
+ __be16 length;
+ __be64 tag;
+};
+
+/*
+ * All SRP (and MAD) requests normally flow from the
+ * client to the server. There is no way for the server to send
+ * an asynchronous message back to the client. The Empty IU is used
+ * to hang out a meaningless request to the server so that it can respond
+ * asynchrouously with something like a SCSI AER
+ */
+struct viosrp_empty_iu {
+ struct mad_common common;
+ __be64 buffer;
+ __be32 port;
+};
+
+struct viosrp_error_log {
+ struct mad_common common;
+ __be64 buffer;
+};
+
+struct viosrp_adapter_info {
+ struct mad_common common;
+ __be64 buffer;
+};
+
+struct viosrp_fast_fail {
+ struct mad_common common;
+};
+
+struct viosrp_capabilities {
+ struct mad_common common;
+ __be64 buffer;
+};
+
+struct mad_capability_common {
+ __be32 cap_type;
+ __be16 length;
+ __be16 server_support;
+};
+
+struct mad_reserve_cap {
+ struct mad_capability_common common;
+ __be32 type;
+};
+
+struct mad_migration_cap {
+ struct mad_capability_common common;
+ __be32 ecl;
+};
+
+struct capabilities {
+ __be32 flags;
+ char name[SRP_MAX_LOC_LEN];
+ char loc[SRP_MAX_LOC_LEN];
+ struct mad_migration_cap migration;
+ struct mad_reserve_cap reserve;
+};
+
+union mad_iu {
+ struct viosrp_empty_iu empty_iu;
+ struct viosrp_error_log error_log;
+ struct viosrp_adapter_info adapter_info;
+ struct viosrp_fast_fail fast_fail;
+ struct viosrp_capabilities capabilities;
+};
+
+union viosrp_iu {
+ union srp_iu srp;
+ union mad_iu mad;
+};
+
+struct mad_adapter_info_data {
+ char srp_version[8];
+ char partition_name[96];
+ __be32 partition_number;
+#define SRP_MAD_VERSION_1 1
+ __be32 mad_version;
+#define SRP_MAD_OS_LINUX 2
+#define SRP_MAD_OS_AIX 3
+ __be32 os_type;
+ __be32 port_max_txu[8]; /* per-port maximum transfer */
+};
+
+#endif
bool target_sense_desc_format(struct se_device *dev);
sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
- struct request_queue *q, int block_size);
+ struct request_queue *q);
#endif /* TARGET_CORE_BACKEND_H */
SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000,
SCF_ACK_KREF = 0x00400000,
SCF_USE_CPUID = 0x00800000,
+ SCF_TASK_ATTR_SET = 0x01000000,
};
/*
void core_tmr_release_req(struct se_tmr_req *);
int transport_generic_handle_tmr(struct se_cmd *);
void transport_generic_request_failure(struct se_cmd *, sense_reason_t);
-void __target_execute_cmd(struct se_cmd *);
int transport_lookup_tmr_lun(struct se_cmd *, u64);
void core_allocate_nexus_loss_ua(struct se_node_acl *acl);
{ BTRFS_BLOCK_GROUP_RAID6, "RAID6"}
#define BTRFS_UUID_SIZE 16
+#define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_UUID_SIZE)
+
+#define TP_fast_assign_fsid(fs_info) \
+ memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE)
+
+#define TP_STRUCT__entry_btrfs(args...) \
+ TP_STRUCT__entry( \
+ TP_STRUCT__entry_fsid \
+ args)
+#define TP_fast_assign_btrfs(fs_info, args...) \
+ TP_fast_assign( \
+ TP_fast_assign_fsid(fs_info); \
+ args)
+#define TP_printk_btrfs(fmt, args...) \
+ TP_printk("%pU: " fmt, __entry->fsid, args)
TRACE_EVENT(btrfs_transaction_commit,
TP_ARGS(root),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, generation )
__field( u64, root_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(root->fs_info,
__entry->generation = root->fs_info->generation;
__entry->root_objectid = root->root_key.objectid;
),
- TP_printk("root = %llu(%s), gen = %llu",
+ TP_printk_btrfs("root = %llu(%s), gen = %llu",
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->generation)
);
TP_ARGS(inode),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( ino_t, ino )
__field( blkcnt_t, blocks )
__field( u64, disk_i_size )
__field( u64, root_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->ino = inode->i_ino;
__entry->blocks = inode->i_blocks;
__entry->disk_i_size = BTRFS_I(inode)->disk_i_size;
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, "
+ TP_printk_btrfs("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, "
"disk_i_size = %llu, last_trans = %llu, logged_trans = %llu",
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->generation,
TP_CONDITION(map),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, start )
__field( u64, len )
__field( unsigned int, compress_type )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->start = map->start;
__entry->len = map->len;
__entry->compress_type = map->compress_type;
),
- TP_printk("root = %llu(%s), start = %llu, len = %llu, "
+ TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu, "
"orig_start = %llu, block_start = %llu(%s), "
"block_len = %llu, flags = %s, refs = %u, "
"compress_type = %u",
TP_ARGS(inode, ordered),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( ino_t, ino )
__field( u64, file_offset )
__field( u64, start )
__field( u64, root_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->ino = inode->i_ino;
__entry->file_offset = ordered->file_offset;
__entry->start = ordered->start;
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk("root = %llu(%s), ino = %llu, file_offset = %llu, "
+ TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
"start = %llu, len = %llu, disk_len = %llu, "
"bytes_left = %llu, flags = %s, compress_type = %d, "
"refs = %d",
TP_ARGS(page, inode, wbc),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( ino_t, ino )
__field( pgoff_t, index )
__field( long, nr_to_write )
__field( u64, root_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->ino = inode->i_ino;
__entry->index = page->index;
__entry->nr_to_write = wbc->nr_to_write;
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, "
+ TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, "
"nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, "
"range_end = %llu, for_kupdate = %d, "
"for_reclaim = %d, range_cyclic = %d, writeback_index = %lu",
TP_ARGS(page, start, end, uptodate),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( ino_t, ino )
__field( pgoff_t, index )
__field( u64, start )
__field( u64, root_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_sb(page->mapping->host->i_sb),
__entry->ino = page->mapping->host->i_ino;
__entry->index = page->index;
__entry->start = start;
BTRFS_I(page->mapping->host)->root->root_key.objectid;
),
- TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, "
+ TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, "
"end = %llu, uptodate = %d",
show_root_type(__entry->root_objectid),
(unsigned long)__entry->ino, (unsigned long)__entry->index,
TP_ARGS(file, datasync),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
+ TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb));
__entry->ino = inode->i_ino;
__entry->parent = d_inode(dentry->d_parent)->i_ino;
__entry->datasync = datasync;
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d",
+ TP_printk_btrfs("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d",
show_root_type(__entry->root_objectid),
(unsigned long)__entry->ino, (unsigned long)__entry->parent,
__entry->datasync)
TRACE_EVENT(btrfs_sync_fs,
- TP_PROTO(int wait),
+ TP_PROTO(struct btrfs_fs_info *fs_info, int wait),
- TP_ARGS(wait),
+ TP_ARGS(fs_info, wait),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( int, wait )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->wait = wait;
),
- TP_printk("wait = %d", __entry->wait)
+ TP_printk_btrfs("wait = %d", __entry->wait)
);
TRACE_EVENT(btrfs_add_block_group,
DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_tree_ref *full_ref,
int action),
- TP_ARGS(ref, full_ref, action),
+ TP_ARGS(fs_info, ref, full_ref, action),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bytenr )
__field( u64, num_bytes )
__field( int, action )
__field( u64, seq )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->bytenr = ref->bytenr;
__entry->num_bytes = ref->num_bytes;
__entry->action = action;
__entry->seq = ref->seq;
),
- TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, "
+ TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
"parent = %llu(%s), ref_root = %llu(%s), level = %d, "
"type = %s, seq = %llu",
(unsigned long long)__entry->bytenr,
DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_tree_ref *full_ref,
int action),
- TP_ARGS(ref, full_ref, action)
+ TP_ARGS(fs_info, ref, full_ref, action)
);
DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_tree_ref *full_ref,
int action),
- TP_ARGS(ref, full_ref, action)
+ TP_ARGS(fs_info, ref, full_ref, action)
);
DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_data_ref *full_ref,
int action),
- TP_ARGS(ref, full_ref, action),
+ TP_ARGS(fs_info, ref, full_ref, action),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bytenr )
__field( u64, num_bytes )
__field( int, action )
__field( u64, seq )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->bytenr = ref->bytenr;
__entry->num_bytes = ref->num_bytes;
__entry->action = action;
__entry->seq = ref->seq;
),
- TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, "
+ TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
"parent = %llu(%s), ref_root = %llu(%s), owner = %llu, "
"offset = %llu, type = %s, seq = %llu",
(unsigned long long)__entry->bytenr,
DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_data_ref *full_ref,
int action),
- TP_ARGS(ref, full_ref, action)
+ TP_ARGS(fs_info, ref, full_ref, action)
);
DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_data_ref *full_ref,
int action),
- TP_ARGS(ref, full_ref, action)
+ TP_ARGS(fs_info, ref, full_ref, action)
);
DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_ref_head *head_ref,
int action),
- TP_ARGS(ref, head_ref, action),
+ TP_ARGS(fs_info, ref, head_ref, action),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bytenr )
__field( u64, num_bytes )
__field( int, action )
__field( int, is_data )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->bytenr = ref->bytenr;
__entry->num_bytes = ref->num_bytes;
__entry->action = action;
__entry->is_data = head_ref->is_data;
),
- TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d",
+ TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d",
(unsigned long long)__entry->bytenr,
(unsigned long long)__entry->num_bytes,
show_ref_action(__entry->action),
DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_ref_head *head_ref,
int action),
- TP_ARGS(ref, head_ref, action)
+ TP_ARGS(fs_info, ref, head_ref, action)
);
DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head,
- TP_PROTO(struct btrfs_delayed_ref_node *ref,
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_ref_head *head_ref,
int action),
- TP_ARGS(ref, head_ref, action)
+ TP_ARGS(fs_info, ref, head_ref, action)
);
#define show_chunk_type(type) \
TP_ARGS(root, map, offset, size),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( int, num_stripes )
__field( u64, type )
__field( int, sub_stripes )
__field( u64, root_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(root->fs_info,
__entry->num_stripes = map->num_stripes;
__entry->type = map->type;
__entry->sub_stripes = map->sub_stripes;
__entry->root_objectid = root->root_key.objectid;
),
- TP_printk("root = %llu(%s), offset = %llu, size = %llu, "
+ TP_printk_btrfs("root = %llu(%s), offset = %llu, size = %llu, "
"num_stripes = %d, sub_stripes = %d, type = %s",
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->offset,
TP_ARGS(root, buf, cow),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, buf_start )
__field( int, refs )
__field( int, cow_level )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->buf_start = buf->start;
__entry->refs = atomic_read(&buf->refs);
__entry->cow_level = btrfs_header_level(cow);
),
- TP_printk("root = %llu(%s), refs = %d, orig_buf = %llu "
+ TP_printk_btrfs("root = %llu(%s), refs = %d, orig_buf = %llu "
"(orig_level = %d), cow_buf = %llu (cow_level = %d)",
show_root_type(__entry->root_objectid),
__entry->refs,
TP_ARGS(fs_info, type, val, bytes, reserve),
- TP_STRUCT__entry(
- __array( u8, fsid, BTRFS_UUID_SIZE )
+ TP_STRUCT__entry_btrfs(
__string( type, type )
__field( u64, val )
__field( u64, bytes )
__field( int, reserve )
),
- TP_fast_assign(
- memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
+ TP_fast_assign_btrfs(fs_info,
__assign_str(type, type);
__entry->val = val;
__entry->bytes = bytes;
__entry->reserve = reserve;
),
- TP_printk("%pU: %s: %Lu %s %Lu", __entry->fsid, __get_str(type),
- __entry->val, __entry->reserve ? "reserve" : "release",
- __entry->bytes)
+ TP_printk_btrfs("%s: %Lu %s %Lu", __get_str(type), __entry->val,
+ __entry->reserve ? "reserve" : "release",
+ __entry->bytes)
);
#define show_flush_action(action) \
TP_ARGS(root, start, len),
- TP_STRUCT__entry(
- __array( u8, fsid, BTRFS_UUID_SIZE )
- __field( u64, root_objectid )
- __field( u64, start )
- __field( u64, len )
+ TP_STRUCT__entry_btrfs(
+ __field( u64, root_objectid )
+ __field( u64, start )
+ __field( u64, len )
),
- TP_fast_assign(
- memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
+ TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->start = start;
__entry->len = len;
),
- TP_printk("%pU: root = %llu(%s), start = %llu, len = %llu",
- __entry->fsid,
+ TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu",
show_root_type(__entry->root_objectid),
(unsigned long long)__entry->start,
(unsigned long long)__entry->len)
TP_ARGS(root, num_bytes, empty_size, data),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, num_bytes )
__field( u64, empty_size )
__field( u64, data )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->num_bytes = num_bytes;
__entry->empty_size = empty_size;
__entry->data = data;
),
- TP_printk("root = %Lu(%s), len = %Lu, empty_size = %Lu, "
+ TP_printk_btrfs("root = %Lu(%s), len = %Lu, empty_size = %Lu, "
"flags = %Lu(%s)", show_root_type(__entry->root_objectid),
__entry->num_bytes, __entry->empty_size, __entry->data,
__print_flags((unsigned long)__entry->data, "|",
TP_ARGS(root, block_group, start, len),
- TP_STRUCT__entry(
- __array( u8, fsid, BTRFS_UUID_SIZE )
+ TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, bg_objectid )
__field( u64, flags )
__field( u64, len )
),
- TP_fast_assign(
- memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
+ TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->len = len;
),
- TP_printk("%pU: root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
- "start = %Lu, len = %Lu", __entry->fsid,
+ TP_printk_btrfs("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
+ "start = %Lu, len = %Lu",
show_root_type(__entry->root_objectid), __entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS),
TP_ARGS(block_group, start, bytes, empty_size, min_bytes),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bg_objectid )
__field( u64, flags )
__field( u64, start )
__field( u64, min_bytes )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(block_group->fs_info,
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->start = start;
__entry->min_bytes = min_bytes;
),
- TP_printk("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
+ TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
" empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
TP_ARGS(block_group),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bg_objectid )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(block_group->fs_info,
__entry->bg_objectid = block_group->key.objectid;
),
- TP_printk("block_group = %Lu", __entry->bg_objectid)
+ TP_printk_btrfs("block_group = %Lu", __entry->bg_objectid)
);
TRACE_EVENT(btrfs_setup_cluster,
TP_ARGS(block_group, cluster, size, bitmap),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bg_objectid )
__field( u64, flags )
__field( u64, start )
__field( int, bitmap )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(block_group->fs_info,
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->start = cluster->window_start;
__entry->bitmap = bitmap;
),
- TP_printk("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
+ TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
"size = %Lu, max_size = %Lu, bitmap = %d",
__entry->bg_objectid,
__entry->flags,
TP_ARGS(work),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( void *, work )
__field( void *, wq )
__field( void *, func )
__field( void *, normal_work )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_work_owner(work),
__entry->work = work;
__entry->wq = work->wq;
__entry->func = work->func;
__entry->normal_work = &work->normal_work;
),
- TP_printk("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p,"
+ TP_printk_btrfs("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p,"
" ordered_free=%p",
__entry->work, __entry->normal_work, __entry->wq,
__entry->func, __entry->ordered_func, __entry->ordered_free)
TP_ARGS(work),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( void *, work )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_work_owner(work),
__entry->work = work;
),
- TP_printk("work->%p", __entry->work)
+ TP_printk_btrfs("work->%p", __entry->work)
);
DEFINE_EVENT(btrfs__work, btrfs_work_queued,
TP_ARGS(wq, name, high),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( void *, wq )
__string( name, name )
__field( int , high )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_workqueue_owner(wq),
__entry->wq = wq;
__assign_str(name, name);
__entry->high = high;
),
- TP_printk("name=%s%s, wq=%p", __get_str(name),
+ TP_printk_btrfs("name=%s%s, wq=%p", __get_str(name),
__print_flags(__entry->high, "",
{(WQ_HIGHPRI), "-high"}),
__entry->wq)
TP_ARGS(wq),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( void *, wq )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_workqueue_owner(wq),
__entry->wq = wq;
),
- TP_printk("wq=%p", __entry->wq)
+ TP_printk_btrfs("wq=%p", __entry->wq)
);
DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
TP_ARGS(inode, free_reserved),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, rootid )
__field( unsigned long, ino )
__field( u64, free_reserved )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->rootid = BTRFS_I(inode)->root->objectid;
__entry->ino = inode->i_ino;
__entry->free_reserved = free_reserved;
),
- TP_printk("rootid=%llu, ino=%lu, free_reserved=%llu",
+ TP_printk_btrfs("rootid=%llu, ino=%lu, free_reserved=%llu",
__entry->rootid, __entry->ino, __entry->free_reserved)
);
TP_ARGS(inode, start, len, reserved, op),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, rootid )
__field( unsigned long, ino )
__field( u64, start )
__field( int, op )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->rootid = BTRFS_I(inode)->root->objectid;
__entry->ino = inode->i_ino;
__entry->start = start;
__entry->op = op;
),
- TP_printk("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s",
+ TP_printk_btrfs("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s",
__entry->rootid, __entry->ino, __entry->start, __entry->len,
__entry->reserved,
__print_flags((unsigned long)__entry->op, "",
DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
- TP_PROTO(u64 ref_root, u64 reserved),
+ TP_PROTO(struct btrfs_fs_info *fs_info, u64 ref_root, u64 reserved),
- TP_ARGS(ref_root, reserved),
+ TP_ARGS(fs_info, ref_root, reserved),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, ref_root )
__field( u64, reserved )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->ref_root = ref_root;
__entry->reserved = reserved;
),
- TP_printk("root=%llu, reserved=%llu, op=free",
+ TP_printk_btrfs("root=%llu, reserved=%llu, op=free",
__entry->ref_root, __entry->reserved)
);
DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref,
- TP_PROTO(u64 ref_root, u64 reserved),
+ TP_PROTO(struct btrfs_fs_info *fs_info, u64 ref_root, u64 reserved),
- TP_ARGS(ref_root, reserved)
+ TP_ARGS(fs_info, ref_root, reserved)
);
DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
- TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *rec),
- TP_ARGS(rec),
+ TP_ARGS(fs_info, rec),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bytenr )
__field( u64, num_bytes )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->bytenr = rec->bytenr,
__entry->num_bytes = rec->num_bytes;
),
- TP_printk("bytenr = %llu, num_bytes = %llu",
+ TP_printk_btrfs("bytenr = %llu, num_bytes = %llu",
(unsigned long long)__entry->bytenr,
(unsigned long long)__entry->num_bytes)
);
DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents,
- TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *rec),
- TP_ARGS(rec)
+ TP_ARGS(fs_info, rec)
);
DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_insert_dirty_extent,
- TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *rec),
- TP_ARGS(rec)
+ TP_ARGS(fs_info, rec)
);
TRACE_EVENT(btrfs_qgroup_account_extent,
- TP_PROTO(u64 bytenr, u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots),
+ TP_PROTO(struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots),
- TP_ARGS(bytenr, num_bytes, nr_old_roots, nr_new_roots),
+ TP_ARGS(fs_info, bytenr, num_bytes, nr_old_roots, nr_new_roots),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, bytenr )
__field( u64, num_bytes )
__field( u64, nr_old_roots )
__field( u64, nr_new_roots )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->bytenr = bytenr;
__entry->num_bytes = num_bytes;
__entry->nr_old_roots = nr_old_roots;
__entry->nr_new_roots = nr_new_roots;
),
- TP_printk("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
+ TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
"nr_new_roots = %llu",
__entry->bytenr,
__entry->num_bytes,
TRACE_EVENT(qgroup_update_counters,
- TP_PROTO(u64 qgid, u64 cur_old_count, u64 cur_new_count),
+ TP_PROTO(struct btrfs_fs_info *fs_info, u64 qgid,
+ u64 cur_old_count, u64 cur_new_count),
- TP_ARGS(qgid, cur_old_count, cur_new_count),
+ TP_ARGS(fs_info, qgid, cur_old_count, cur_new_count),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, qgid )
__field( u64, cur_old_count )
__field( u64, cur_new_count )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->qgid = qgid;
__entry->cur_old_count = cur_old_count;
__entry->cur_new_count = cur_new_count;
),
- TP_printk("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
+ TP_printk_btrfs("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
__entry->qgid,
__entry->cur_old_count,
__entry->cur_new_count)
show_rqstp_flags(__entry->flags))
);
+DECLARE_EVENT_CLASS(svc_rqst_event,
+
+ TP_PROTO(struct svc_rqst *rqst),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(__be32, xid)
+ __field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = rqst->rq_xid;
+ __entry->flags = rqst->rq_flags;
+ memcpy(__get_dynamic_array(addr),
+ &rqst->rq_addr, rqst->rq_addrlen);
+ ),
+
+ TP_printk("addr=%pIScp rq_xid=0x%x flags=%s",
+ (struct sockaddr *)__get_dynamic_array(addr),
+ be32_to_cpu(__entry->xid),
+ show_rqstp_flags(__entry->flags))
+);
+
+DEFINE_EVENT(svc_rqst_event, svc_defer,
+ TP_PROTO(struct svc_rqst *rqst),
+ TP_ARGS(rqst));
+
+DEFINE_EVENT(svc_rqst_event, svc_drop,
+ TP_PROTO(struct svc_rqst *rqst),
+ TP_ARGS(rqst));
+
DECLARE_EVENT_CLASS(svc_rqst_status,
TP_PROTO(struct svc_rqst *rqst, int status),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
- __field_struct(struct sockaddr_storage, ss)
__field(int, pid)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, xprt != NULL ?
+ xprt->xpt_remotelen : 0)
),
TP_fast_assign(
__entry->xprt = xprt;
- xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->pid = rqst? rqst->rq_task->pid : 0;
- __entry->flags = xprt ? xprt->xpt_flags : 0;
+ if (xprt) {
+ memcpy(__get_dynamic_array(addr),
+ &xprt->xpt_remote,
+ xprt->xpt_remotelen);
+ __entry->flags = xprt->xpt_flags;
+ } else
+ __entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
- (struct sockaddr *)&__entry->ss,
+ __get_dynamic_array_len(addr) != 0 ?
+ (struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->pid, show_svc_xprt_flags(__entry->flags))
);
-TRACE_EVENT(svc_xprt_dequeue,
+DECLARE_EVENT_CLASS(svc_xprt_event,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
- __field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, xprt != NULL ?
+ xprt->xpt_remotelen : 0)
),
TP_fast_assign(
- __entry->xprt = xprt,
- xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
- __entry->flags = xprt ? xprt->xpt_flags : 0;
+ __entry->xprt = xprt;
+ if (xprt) {
+ memcpy(__get_dynamic_array(addr),
+ &xprt->xpt_remote,
+ xprt->xpt_remotelen);
+ __entry->flags = xprt->xpt_flags;
+ } else
+ __entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
- (struct sockaddr *)&__entry->ss,
+ __get_dynamic_array_len(addr) != 0 ?
+ (struct sockaddr *)__get_dynamic_array(addr) : NULL,
show_svc_xprt_flags(__entry->flags))
);
+DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue,
+ TP_PROTO(struct svc_xprt *xprt),
+ TP_ARGS(xprt));
+
+DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
+ TP_PROTO(struct svc_xprt *xprt),
+ TP_ARGS(xprt));
+
TRACE_EVENT(svc_wake_up,
TP_PROTO(int pid),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(int, len)
- __field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, xprt != NULL ?
+ xprt->xpt_remotelen : 0)
),
TP_fast_assign(
__entry->xprt = xprt;
- xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->len = len;
- __entry->flags = xprt ? xprt->xpt_flags : 0;
+ if (xprt) {
+ memcpy(__get_dynamic_array(addr),
+ &xprt->xpt_remote,
+ xprt->xpt_remotelen);
+ __entry->flags = xprt->xpt_flags;
+ } else
+ __entry->flags = 0;
),
TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
- (struct sockaddr *)&__entry->ss,
+ __get_dynamic_array_len(addr) != 0 ?
+ (struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->len, show_svc_xprt_flags(__entry->flags))
);
+
+
+DECLARE_EVENT_CLASS(svc_deferred_event,
+ TP_PROTO(struct svc_deferred_req *dr),
+
+ TP_ARGS(dr),
+
+ TP_STRUCT__entry(
+ __field(__be32, xid)
+ __dynamic_array(unsigned char, addr, dr->addrlen)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = *(__be32 *)(dr->args + (dr->xprt_hlen>>2));
+ memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen);
+ ),
+
+ TP_printk("addr=%pIScp xid=0x%x",
+ (struct sockaddr *)__get_dynamic_array(addr),
+ be32_to_cpu(__entry->xid))
+);
+
+DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
+ TP_PROTO(struct svc_deferred_req *dr),
+ TP_ARGS(dr));
+DEFINE_EVENT(svc_deferred_event, svc_revisit_deferred,
+ TP_PROTO(struct svc_deferred_req *dr),
+ TP_ARGS(dr));
#endif /* _TRACE_SUNRPC_H */
#include <trace/define_trace.h>
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
- struct btrfs_ioctl_ino_path_args)
+ struct btrfs_ioctl_logical_ino_args)
#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
struct btrfs_ioctl_received_subvol_args)
#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
+#define NT_PPC_TAR 0x103 /* Target Address Register */
+#define NT_PPC_PPR 0x104 /* Program Priority Register */
+#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */
+#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */
+#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
+#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
+#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
header-y += rdma_netlink.h
header-y += rdma_user_cm.h
header-y += hfi/
+header-y += rdma_user_rxe.h
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define HFI1_USER_SWMINOR 1
+#define HFI1_USER_SWMINOR 2
/*
* We will encode the major/minor inside a single 32bit version number.
IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL
};
/*
__u64 driver_data[0];
};
+enum ib_uverbs_create_qp_mask {
+ IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0,
+};
+
+enum {
+ IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
+};
+
struct ib_uverbs_ex_create_qp {
__u64 user_handle;
__u32 pd_handle;
__u8 reserved;
__u32 comp_mask;
__u32 create_flags;
+ __u32 rwq_ind_tbl_handle;
+ __u32 reserved1;
};
struct ib_uverbs_open_qp {
struct ib_uverbs_flow_tcp_udp_filter mask;
};
+struct ib_uverbs_flow_ipv6_filter {
+ __u8 src_ip[16];
+ __u8 dst_ip[16];
+};
+
+struct ib_uverbs_flow_spec_ipv6 {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_ipv6_filter val;
+ struct ib_uverbs_flow_ipv6_filter mask;
+};
+
struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
__u32 events_reported;
};
+struct ib_uverbs_ex_create_wq {
+ __u32 comp_mask;
+ __u32 wq_type;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 cq_handle;
+ __u32 max_wr;
+ __u32 max_sge;
+};
+
+struct ib_uverbs_ex_create_wq_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ __u32 wq_handle;
+ __u32 max_wr;
+ __u32 max_sge;
+ __u32 wqn;
+};
+
+struct ib_uverbs_ex_destroy_wq {
+ __u32 comp_mask;
+ __u32 wq_handle;
+};
+
+struct ib_uverbs_ex_destroy_wq_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ __u32 events_reported;
+ __u32 reserved;
+};
+
+struct ib_uverbs_ex_modify_wq {
+ __u32 attr_mask;
+ __u32 wq_handle;
+ __u32 wq_state;
+ __u32 curr_wq_state;
+};
+
+/* Prevent memory allocation rather than max expected size */
+#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d
+struct ib_uverbs_ex_create_rwq_ind_table {
+ __u32 comp_mask;
+ __u32 log_ind_tbl_size;
+ /* Following are the wq handles according to log_ind_tbl_size
+ * wq_handle1
+ * wq_handle2
+ */
+ __u32 wq_handles[0];
+};
+
+struct ib_uverbs_ex_create_rwq_ind_table_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ __u32 ind_tbl_handle;
+ __u32 ind_tbl_num;
+};
+
+struct ib_uverbs_ex_destroy_rwq_ind_table {
+ __u32 comp_mask;
+ __u32 ind_tbl_handle;
+};
+
#endif /* IB_USER_VERBS_H */
__u32 id;
};
+/* Multicast join flags */
+enum {
+ RDMA_MC_JOIN_FLAG_FULLMEMBER,
+ RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER,
+ RDMA_MC_JOIN_FLAG_RESERVED,
+};
+
struct rdma_ucm_join_mcast {
__u64 response; /* rdma_ucma_create_id_resp */
__u64 uid;
__u32 id;
__u16 addr_size;
- __u16 reserved;
+ __u16 join_flags;
struct sockaddr_storage addr;
};
--- /dev/null
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_USER_RXE_H
+#define RDMA_USER_RXE_H
+
+#include <linux/types.h>
+
+union rxe_gid {
+ __u8 raw[16];
+ struct {
+ __be64 subnet_prefix;
+ __be64 interface_id;
+ } global;
+};
+
+struct rxe_global_route {
+ union rxe_gid dgid;
+ __u32 flow_label;
+ __u8 sgid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+};
+
+struct rxe_av {
+ __u8 port_num;
+ __u8 network_type;
+ struct rxe_global_route grh;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+};
+
+struct rxe_send_wr {
+ __u64 wr_id;
+ __u32 num_sge;
+ __u32 opcode;
+ __u32 send_flags;
+ union {
+ __be32 imm_data;
+ __u32 invalidate_rkey;
+ } ex;
+ union {
+ struct {
+ __u64 remote_addr;
+ __u32 rkey;
+ } rdma;
+ struct {
+ __u64 remote_addr;
+ __u64 compare_add;
+ __u64 swap;
+ __u32 rkey;
+ } atomic;
+ struct {
+ __u32 remote_qpn;
+ __u32 remote_qkey;
+ __u16 pkey_index;
+ } ud;
+ struct {
+ struct ib_mr *mr;
+ __u32 key;
+ int access;
+ } reg;
+ } wr;
+};
+
+struct rxe_sge {
+ __u64 addr;
+ __u32 length;
+ __u32 lkey;
+};
+
+struct mminfo {
+ __u64 offset;
+ __u32 size;
+ __u32 pad;
+};
+
+struct rxe_dma_info {
+ __u32 length;
+ __u32 resid;
+ __u32 cur_sge;
+ __u32 num_sge;
+ __u32 sge_offset;
+ union {
+ __u8 inline_data[0];
+ struct rxe_sge sge[0];
+ };
+};
+
+struct rxe_send_wqe {
+ struct rxe_send_wr wr;
+ struct rxe_av av;
+ __u32 status;
+ __u32 state;
+ __u64 iova;
+ __u32 mask;
+ __u32 first_psn;
+ __u32 last_psn;
+ __u32 ack_length;
+ __u32 ssn;
+ __u32 has_rd_atomic;
+ struct rxe_dma_info dma;
+};
+
+struct rxe_recv_wqe {
+ __u64 wr_id;
+ __u32 num_sge;
+ __u32 padding;
+ struct rxe_dma_info dma;
+};
+
+#endif /* RDMA_USER_RXE_H */
BUILD_BUG_ON((int)ATOMIC_INIT(0) != 0);
BUILD_BUG_ON((int)ATOMIC_INIT(1) != 1);
+ if (static_key_initialized)
+ return;
+
jump_label_lock();
jump_label_sort_entries(iter_start, iter_stop);
bool "Allow for memory hot-add"
depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on ARCH_ENABLE_MEMORY_HOTPLUG
+ depends on !KASAN
config MEMORY_HOTPLUG_SPARSE
def_bool y
return i ? i : -EFAULT;
}
+#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+/*
+ * ARCHes with special requirements for evicting HUGETLB backing TLB entries can
+ * implement this.
+ */
+#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#endif
+
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot)
{
* once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk.
*/
- flush_tlb_range(vma, start, end);
+ flush_hugetlb_tlb_range(vma, start, end);
mmu_notifier_invalidate_range(mm, start, end);
i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end);
* @flags: flags of the new region
*
* Insert new memblock region [@base,@base+@size) into @type at @idx.
- * @type must already have extra room to accomodate the new region.
+ * @type must already have extra room to accommodate the new region.
*/
static void __init_memblock memblock_insert_region(struct memblock_type *type,
int idx, phys_addr_t base,
/*
* The following is executed twice. Once with %false @insert and
* then with %true. The first counts the number of regions needed
- * to accomodate the new area. The second actually inserts them.
+ * to accommodate the new area. The second actually inserts them.
*/
base = obase;
nr_new = 0;
if (*idx == (u64)ULLONG_MAX) {
idx_a = type_a->cnt - 1;
- idx_b = type_b->cnt;
+ if (type_b != NULL)
+ idx_b = type_b->cnt;
+ else
+ idx_b = 0;
}
for (; idx_a >= 0; idx_a--) {
zone->pageset = alloc_percpu(struct per_cpu_pageset);
for_each_possible_cpu(cpu)
zone_pageset_init(zone, cpu);
-
- if (!zone->zone_pgdat->per_cpu_nodestats) {
- zone->zone_pgdat->per_cpu_nodestats =
- alloc_percpu(struct per_cpu_nodestat);
- }
}
/*
*/
void __init setup_per_cpu_pageset(void)
{
+ struct pglist_data *pgdat;
struct zone *zone;
for_each_populated_zone(zone)
setup_zone_pageset(zone);
+
+ for_each_online_pgdat(pgdat)
+ pgdat->per_cpu_nodestats =
+ alloc_percpu(struct per_cpu_nodestat);
}
static noinline __ref
#endif
}
-inline void *fixup_red_left(struct kmem_cache *s, void *p)
+void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
p += s->red_left_pad;
if (status)
goto out;
- dprintk("RPC: svcauth_gss: gss major status = %d\n",
- ud.major_status);
+ dprintk("RPC: svcauth_gss: gss major status = %d "
+ "minor status = %d\n",
+ ud.major_status, ud.minor_status);
switch (ud.major_status) {
case GSS_S_CONTINUE_NEEDED:
cache_purge(cd);
spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock);
- if (cd->entries || atomic_read(&cd->inuse)) {
+ if (cd->entries) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
goto out;
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+static unsigned int svc_rpc_per_connection_limit __read_mostly;
+module_param(svc_rpc_per_connection_limit, uint, 0644);
+
+
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
}
EXPORT_SYMBOL_GPL(svc_print_addr);
+static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
+{
+ unsigned int limit = svc_rpc_per_connection_limit;
+ int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
+
+ return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
+}
+
+static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+{
+ if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
+ if (!svc_xprt_slots_in_range(xprt))
+ return false;
+ atomic_inc(&xprt->xpt_nr_rqsts);
+ set_bit(RQ_DATA, &rqstp->rq_flags);
+ }
+ return true;
+}
+
+static void svc_xprt_release_slot(struct svc_rqst *rqstp)
+{
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+ if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
+ atomic_dec(&xprt->xpt_nr_rqsts);
+ svc_xprt_enqueue(xprt);
+ }
+}
+
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
{
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
return true;
- if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED)))
- return xprt->xpt_ops->xpo_has_wspace(xprt);
+ if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
+ if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
+ svc_xprt_slots_in_range(xprt))
+ return true;
+ trace_svc_xprt_no_write_space(xprt);
+ return false;
+ }
return false;
}
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
- if (xprt->xpt_ops->xpo_adjust_wspace)
- xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt);
}
}
rqstp->rq_res.head[0].iov_len = 0;
svc_reserve(rqstp, 0);
+ svc_xprt_release_slot(rqstp);
rqstp->rq_xprt = NULL;
-
svc_xprt_put(xprt);
}
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
- } else {
+ } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
*/
void svc_drop(struct svc_rqst *rqstp)
{
+ trace_svc_drop(rqstp);
dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
spin_unlock(&xprt->xpt_lock);
dprintk("revisit canceled\n");
svc_xprt_put(xprt);
+ trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
+ trace_svc_defer(rqstp);
return &dr->handle;
}
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
+ trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
-static void svc_udp_data_ready(struct sock *);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
static void svc_sock_detach(struct svc_xprt *);
return svc_port_is_privileged(svc_addr(rqstp));
}
-static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
-{
- if (!wq)
- return false;
- /*
- * There should normally be a memory * barrier here--see
- * wq_has_sleeper().
- *
- * It appears that isn't currently necessary, though, basically
- * because callers all appear to have sufficient memory barriers
- * between the time the relevant change is made and the
- * time they call these callbacks.
- *
- * The nfsd code itself doesn't actually explicitly wait on
- * these waitqueues, but it may wait on them for example in
- * sendpage() or sendmsg() calls. (And those may be the only
- * places, since it it uses nonblocking reads.)
- *
- * Maybe we should add the memory barriers anyway, but these are
- * hot paths so we'd need to be convinced there's no sigificant
- * penalty.
- */
- return waitqueue_active(wq);
-}
-
/*
* INET callback when data has been received on the socket.
*/
-static void svc_udp_data_ready(struct sock *sk)
+static void svc_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
+ svsk->sk_odata(sk);
+ if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
+ svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible(wq);
}
/*
static void svc_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
- wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+ svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
-
- if (sunrpc_waitqueue_active(wq)) {
- dprintk("RPC svc_write_space: someone sleeping on %p\n",
- svsk);
- wake_up_interruptible(wq);
- }
}
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
- struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- int required;
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
return 1;
- required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
- if (sk_stream_wspace(svsk->sk_sk) >= required ||
- (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
- atomic_read(&xprt->xpt_reserved) == 0))
- return 1;
- set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- return 0;
-}
-
-static void svc_tcp_write_space(struct sock *sk)
-{
- struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
- struct socket *sock = sk->sk_socket;
-
- if (!sk_stream_is_writeable(sk) || !sock)
- return;
- if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
- clear_bit(SOCK_NOSPACE, &sock->flags);
- svc_write_space(sk);
-}
-
-static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
-{
- struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-
- if (svc_tcp_has_wspace(xprt))
- clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
}
/*
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
- svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
+ svsk->sk_sk->sk_data_ready = svc_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
/* initialise setting must have enough space to
static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq;
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
+ if (svsk)
+ svsk->sk_odata(sk);
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
} else
printk("svc: socket %p: no user data\n", sk);
}
-
- wq = sk_sleep(sk);
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible_all(wq);
}
/*
static void svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq = sk_sleep(sk);
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
sk, sk->sk_state, sk->sk_user_data);
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- }
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible_all(wq);
-}
-
-static void svc_tcp_data_ready(struct sock *sk)
-{
- struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq = sk_sleep(sk);
-
- dprintk("svc: socket %p TCP data ready (svsk %p)\n",
- sk, sk->sk_user_data);
- if (svsk) {
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
+ svsk->sk_ostate(sk);
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_enqueue(&svsk->sk_xprt);
+ }
}
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible(wq);
}
/*
dprintk("%s: connect from %s\n", serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
+ /* Reset the inherited callbacks before calling svc_setup_socket */
+ newsock->sk->sk_state_change = svsk->sk_ostate;
+ newsock->sk->sk_data_ready = svsk->sk_odata;
+ newsock->sk->sk_write_space = svsk->sk_owspace;
+
/* make sure that a write doesn't block forever when
* low on memory
*/
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
- .xpo_adjust_wspace = svc_tcp_adjust_wspace,
};
static struct svc_xprt_class svc_tcp_class = {
} else {
dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change;
- sk->sk_data_ready = svc_tcp_data_ready;
- sk->sk_write_space = svc_tcp_write_space;
+ sk->sk_data_ready = svc_data_ready;
+ sk->sk_write_space = svc_write_space;
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- if (sk->sk_state != TCP_ESTABLISHED)
+ switch (sk->sk_state) {
+ case TCP_SYN_RECV:
+ case TCP_ESTABLISHED:
+ break;
+ default:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ }
}
}
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv);
- else {
- /* initialise setting must have enough space to
- * receive and respond to one request.
- */
- svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
- 4 * serv->sv_max_mesg);
+ else
svc_tcp_init(svsk, serv);
- }
- dprintk("svc: svc_setup_socket created %p (inet %p)\n",
- svsk, svsk->sk_sk);
+ dprintk("svc: svc_setup_socket created %p (inet %p), "
+ "listen %d close %d\n",
+ svsk, svsk->sk_sk,
+ test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
+ test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
return svsk;
}
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;
- wait_queue_head_t *wq;
dprintk("svc: svc_sock_detach(%p)\n", svsk);
/* put back the old socket callbacks */
+ lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
sk->sk_data_ready = svsk->sk_odata;
sk->sk_write_space = svsk->sk_owspace;
-
- wq = sk_sleep(sk);
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible(wq);
+ sk->sk_user_data = NULL;
+ release_sock(sk);
}
/*
Drivers that are implemented on ASoC can be found in
"ALSA for SoC audio support" section.
-config SND_PXA2XX_LIB
- tristate
- select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97
- select SND_DMAENGINE_PCM
-
-config SND_PXA2XX_LIB_AC97
- bool
-
if SND_ARM
config SND_ARMAACI
endif # SND_ARM
+config SND_PXA2XX_LIB
+ tristate
+ select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97
+ select SND_DMAENGINE_PCM
+
+config SND_PXA2XX_LIB_AC97
+ bool
return NULL;
if (array->used >= array->alloced) {
int num = array->alloced + array->alloc_align;
+ int oldsize = array->alloced * array->elem_size;
int size = (num + 1) * array->elem_size;
void *nlist;
if (snd_BUG_ON(num >= 4096))
return NULL;
- nlist = krealloc(array->list, size, GFP_KERNEL | __GFP_ZERO);
+ nlist = krealloc(array->list, size, GFP_KERNEL);
if (!nlist)
return NULL;
+ memset(nlist + oldsize, 0, size - oldsize);
array->list = nlist;
array->alloced = num;
}
{ PCI_DEVICE(0x1022, 0x780d),
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
/* ATI HDMI */
+ { PCI_DEVICE(0x1002, 0x0002),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
{ PCI_DEVICE(0x1002, 0x1308),
.driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
{ PCI_DEVICE(0x1002, 0x157a),
}
}
+static void alc298_fixup_speaker_volume(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ /* The speaker is routed to the Node 0x06 by a mistake, as a result
+ we can't adjust the speaker's volume since this node does not has
+ Amp-out capability. we change the speaker's route to:
+ Node 0x02 (Audio Output) -> Node 0x0c (Audio Mixer) -> Node 0x17 (
+ Pin Complex), since Node 0x02 has Amp-out caps, we can adjust
+ speaker's volume now. */
+
+ hda_nid_t conn1[1] = { 0x0c };
+ snd_hda_override_conn_list(codec, 0x17, 1, conn1);
+ }
+}
+
/* Hook to update amp GPIO4 for automute */
static void alc280_hp_gpio4_automute_hook(struct hda_codec *codec,
struct hda_jack_callback *jack)
ALC280_FIXUP_HP_HEADSET_MIC,
ALC221_FIXUP_HP_FRONT_MIC,
ALC292_FIXUP_TPT460,
+ ALC298_FIXUP_SPK_VOLUME,
};
static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE,
},
+ [ALC298_FIXUP_SPK_VOLUME] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc298_fixup_speaker_volume,
+ .chained = true,
+ .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
{0x14, 0x90170130},
{0x1b, 0x01014020},
{0x21, 0x0221103f}),
+ SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x14, 0x90170130},
+ {0x1b, 0x02011020},
+ {0x21, 0x0221103f}),
SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x14, 0x90170150},
{0x1b, 0x02011020},
ALC668_FIXUP_ASUS_Nx51,
ALC891_FIXUP_HEADSET_MODE,
ALC891_FIXUP_DELL_MIC_NO_PRESENCE,
+ ALC662_FIXUP_ACER_VERITON,
};
static const struct hda_fixup alc662_fixups[] = {
.chained = true,
.chain_id = ALC891_FIXUP_HEADSET_MODE
},
+ [ALC662_FIXUP_ACER_VERITON] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x15, 0x50170120 }, /* no internal speaker */
+ { }
+ }
+ },
};
static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
SND_PCI_QUIRK(0x19da, 0xa130, "Zotac Z68", ALC662_FIXUP_ZOTAC_Z68),
+ SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON),
SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T),
#if 0
retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
if (retval == -1) {
- if (errno == ENOTTY) {
+ if (errno == EINVAL) {
fprintf(stderr,
"\n...Alarm IRQs not supported.\n");
goto test_PIE;
}
+
perror("RTC_ALM_SET ioctl");
exit(errno);
}
/* Enable alarm interrupts */
retval = ioctl(fd, RTC_AIE_ON, 0);
if (retval == -1) {
+ if (errno == EINVAL) {
+ fprintf(stderr,
+ "\n...Alarm IRQs not supported.\n");
+ goto test_PIE;
+ }
+
perror("RTC_AIE_ON ioctl");
exit(errno);
}
retval = ioctl(fd, RTC_IRQP_READ, &tmp);
if (retval == -1) {
/* not all RTCs support periodic IRQs */
- if (errno == ENOTTY) {
+ if (errno == EINVAL) {
fprintf(stderr, "\nNo periodic IRQ support\n");
goto done;
}
retval = ioctl(fd, RTC_IRQP_SET, tmp);
if (retval == -1) {
/* not all RTCs can change their periodic IRQ rate */
- if (errno == ENOTTY) {
+ if (errno == EINVAL) {
fprintf(stderr,
"\n...Periodic IRQ rate is fixed\n");
goto done;