--- /dev/null
+* ARC-HS Interrupt Distribution Unit
+
+ This optional 2nd level interrupt controller can be used in SMP configurations for
+ dynamic IRQ routing, load balancing of common/external IRQs towards core intc.
+
+Properties:
+
+- compatible: "snps,archs-idu-intc"
+- interrupt-controller: This is an interrupt controller.
+- interrupt-parent: <reference to parent core intc>
+- #interrupt-cells: Must be <2>.
+- interrupts: <...> specifies the upstream core irqs
+
+ First cell specifies the "common" IRQ from peripheral to IDU
+ Second cell specifies the irq distribution mode to cores
+ 0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+
+ intc accessed via the special ARC AUX register interface, hence "reg" property
+ is not specified.
+
+Example:
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /* upstream core irqs: downstream these are "COMMON" irq 0,1.. */
+ interrupts = <24 25 26 27 28 29 30 31>;
+ };
+
+ some_device: serial@c0fc1000 {
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 0>; /* upstream idu IRQ #24, Round Robin */
+ };
--- /dev/null
+* ARC-HS incore Interrupt Controller (Provided by cores implementing ARCv2 ISA)
+
+Properties:
+
+- compatible: "snps,archs-intc"
+- interrupt-controller: This is an interrupt controller.
+- #interrupt-cells: Must be <1>.
+
+ Single Cell "interrupts" property of a device specifies the IRQ number
+ between 16 to 256
+
+ intc accessed via the special ARC AUX register interface, hence "reg" property
+ is not specified.
+
+Example:
+
+ intc: interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupts = <16 17 18 19 20 21 22 23 24 25>;
+ };
--- /dev/null
+Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+SDP Main Board with an AXC001 CPU Card hoisting ARC700 core in silicon
+
+Required root node properties:
+ - compatible = "snps,axs101", "snps,arc-sdp";
--- /dev/null
+Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+SDP Main Board with an AXC003 FPGA Card which can contain various flavours of
+HS38x cores.
+
+Required root node properties:
+ - compatible = "snps,axs103", "snps,arc-sdp";
* Marvell Armada 370 / Armada XP Ethernet Controller (NETA)
Required properties:
-- compatible: should be "marvell,armada-370-neta".
+- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta".
- reg: address and length of the register set for the device.
- interrupts: interrupt for the device
- phy: See ethernet.txt file in the same directory.
Name Removed
---- -------
- delaylog/nodelaylog v3.20
- ihashsize v3.20
- irixsgid v3.20
- osyncisdsync/osyncisosync v3.20
+ delaylog/nodelaylog v4.0
+ ihashsize v4.0
+ irixsgid v4.0
+ osyncisdsync/osyncisosync v4.0
sysctls
Name Removed
---- -------
- fs.xfs.xfsbufd_centisec v3.20
- fs.xfs.age_buffer_centisecs v3.20
+ fs.xfs.xfsbufd_centisec v4.0
+ fs.xfs.age_buffer_centisecs v4.0
timeconst-file := include/generated/timeconst.h
-#always += $(timeconst-file)
targets += $(timeconst-file)
quiet_cmd_gentimeconst = GEN $@
F: Documentation/devicetree/bindings/arc/
F: drivers/tty/serial/arc_uart.c
+SYNOPSYS ARC SDP platform support
+M: Alexey Brodkin <abrodkin@synopsys.com>
+S: Supported
+F: arch/arc/plat-axs10x
+F: arch/arc/boot/dts/ax*
+F: Documentation/devicetree/bindings/arc/axs10*
+
SYSTEM CONFIGURATION (SYSCON)
M: Lee Jones <lee.jones@linaro.org>
M: Arnd Bergmann <arnd@arndb.de>
menu "ARC Platform/SoC/Board"
-source "arch/arc/plat-arcfpga/Kconfig"
+source "arch/arc/plat-sim/Kconfig"
source "arch/arc/plat-tb10x/Kconfig"
+source "arch/arc/plat-axs10x/Kconfig"
#New platform adds here
endmenu
+choice
+ prompt "ARC Instruction Set"
+ default ISA_ARCOMPACT
+
+config ISA_ARCOMPACT
+ bool "ARCompact ISA"
+ help
+ The original ARC ISA of ARC600/700 cores
+
+config ISA_ARCV2
+ bool "ARC ISA v2"
+ help
+ ISA for the Next Generation ARC-HS cores
+
+endchoice
+
menu "ARC CPU Configuration"
choice
prompt "ARC Core"
- default ARC_CPU_770
+ default ARC_CPU_770 if ISA_ARCOMPACT
+ default ARC_CPU_HS if ISA_ARCV2
+
+if ISA_ARCOMPACT
config ARC_CPU_750D
bool "ARC750D"
config ARC_CPU_770
bool "ARC770"
- select ARC_CPU_REL_4_10
+ select ARC_HAS_SWAPE
help
Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
This core has a bunch of cool new features:
-Caches: New Prog Model, Region Flush
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
+endif #ISA_ARCOMPACT
+
+config ARC_CPU_HS
+ bool "ARC-HS"
+ depends on ISA_ARCV2
+ help
+ Support for ARC HS38x Cores based on ARCv2 ISA
+ The notable features are:
+ - SMP configurations of upto 4 core with coherency
+ - Optional L2 Cache and IO-Coherency
+ - Revised Interrupt Architecture (multiple priorites, reg banks,
+ auto stack switch, auto regfile save/restore)
+ - MMUv4 (PIPT dcache, Huge Pages)
+ - Instructions for
+ * 64bit load/store: LDD, STD
+ * Hardware assisted divide/remainder: DIV, REM
+ * Function prologue/epilogue: ENTER_S, LEAVE_S
+ * IRQ enable/disable: CLRI, SETI
+ * pop count: FFS, FLS
+ * SETcc, BMSKN, XBFU...
+
endchoice
config CPU_BIG_ENDIAN
help
Build kernel for Big Endian Mode of ARC CPU
-# If a platform can't work with 0x8000_0000 based dma_addr_t
-config ARC_PLAT_NEEDS_CPU_TO_DMA
- bool
-
config SMP
- bool "Symmetric Multi-Processing (Incomplete)"
+ bool "Symmetric Multi-Processing"
default n
+ select ARC_HAS_COH_CACHES if ISA_ARCV2
+ select ARC_MCIP if ISA_ARCV2
help
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, say N. If you have a system with more
- than one CPU, say Y.
+ This enables support for systems with more than one CPU.
if SMP
config ARC_HAS_REENTRANT_IRQ_LV2
def_bool n
-endif
+config ARC_MCIP
+ bool "ARConnect Multicore IP (MCIP) Support "
+ depends on ISA_ARCV2
+ help
+ This IP block enables SMP in ARC-HS38 cores.
+ It provides for cross-core interrupts, multi-core debug
+ hardware semaphores, shared memory,....
config NR_CPUS
int "Maximum number of CPUs (2-4096)"
range 2 4096
- depends on SMP
- default "2"
+ default "4"
+
+endif #SMP
menuconfig ARC_CACHE
bool "Enable Cache Support"
config ARC_CACHE_VIPT_ALIASING
bool "Support VIPT Aliasing D$"
- depends on ARC_HAS_DCACHE
+ depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
default n
endif #ARC_CACHE
Multipler. Otherwise software multipy lib is used
choice
- prompt "ARC700 MMU Version"
+ prompt "MMU Version"
default ARC_MMU_V3 if ARC_CPU_770
default ARC_MMU_V2 if ARC_CPU_750D
+ default ARC_MMU_V4 if ARC_CPU_HS
config ARC_MMU_V1
bool "MMU v1"
Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
Shared Address Spaces (SASID)
+config ARC_MMU_V4
+ bool "MMU v4"
+ depends on ISA_ARCV2
+
endchoice
endchoice
+if ISA_ARCOMPACT
+
config ARC_COMPACT_IRQ_LEVELS
bool "ARCompact IRQ Priorities: High(2)/Low(1)"
default n
config ARC_IRQ6_LV2
bool
-endif
+endif #ARC_COMPACT_IRQ_LEVELS
config ARC_FPU_SAVE_RESTORE
bool "Enable FPU state persistence across context switch"
based on actual usage of FPU by a task. Thus our implemn does
this for all tasks in system.
+endif #ISA_ARCOMPACT
+
config ARC_CANT_LLSC
def_bool n
-menuconfig ARC_CPU_REL_4_10
- bool "Enable support for Rel 4.10 features"
- default n
- help
- -ARC770 (and dependent features) enabled
- -ARC750 also shares some of the new features with 770
-
config ARC_HAS_LLSC
bool "Insn: LLOCK/SCOND (efficient atomic ops)"
default y
- depends on ARC_CPU_770 && !ARC_CANT_LLSC
+ depends on !ARC_CPU_750D && !ARC_CANT_LLSC
config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)"
default y
- depends on ARC_CPU_REL_4_10
-config ARC_HAS_RTSC
- bool "Insn: RTSC (64-bit r/o cycle counter)"
+if ISA_ARCV2
+
+config ARC_HAS_LL64
+ bool "Insn: 64bit LDD/STD"
+ help
+ Enable gcc to generate 64-bit load/store instructions
+ ISA mandates even/odd registers to allow encoding of two
+ dest operands with 2 possible source operands.
default y
- depends on ARC_CPU_REL_4_10
+
+config ARC_HAS_RTC
+ bool "Local 64-bit r/o cycle counter"
+ default n
depends on !SMP
+config ARC_HAS_GRTC
+ bool "SMP synchronized 64-bit cycle counter"
+ default y
+ depends on SMP
+
+config ARC_NUMBER_OF_INTERRUPTS
+ int "Number of interrupts"
+ range 8 240
+ default 32
+ help
+ This defines the number of interrupts on the ARCv2HS core.
+ It affects the size of vector table.
+ The initial 8 IRQs are fixed (Timer, ICI etc) and although configurable
+ in hardware, it keep things simple for Linux to assume they are always
+ present.
+
+endif # ISA_ARCV2
+
endmenu # "ARC CPU Configuration"
config LINUX_LINK_BASE
config ARC_EMUL_UNALIGNED
bool "Emulate unaligned memory access (userspace only)"
+ default N
select SYSCTL_ARCH_UNALIGN_NO_WARN
select SYSCTL_ARCH_UNALIGN_ALLOW
+ depends on ISA_ARCOMPACT
help
This enables misaligned 16 & 32 bit memory access from user space.
Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide
bool "ARC debugging"
default y
+if ARC_DBG
+
config ARC_DW2_UNWIND
bool "Enable DWARF specific kernel stack unwind"
- depends on ARC_DBG
default y
select KALLSYMS
help
config ARC_DBG_TLB_PARANOIA
bool "Paranoia Checks in Low Level TLB Handlers"
- depends on ARC_DBG
default n
config ARC_DBG_TLB_MISS_COUNT
bool "Profile TLB Misses"
default n
select DEBUG_FS
- depends on ARC_DBG
help
Counts number of I and D TLB Misses and exports them via Debugfs
The counters can be cleared via Debugfs as well
+if SMP
+
+config ARC_IPI_DBG
+ bool "Debug Inter Core interrupts"
+ default n
+
+endif
+
+endif
+
+config ARC_UBOOT_SUPPORT
+ bool "Support uboot arg Handling"
+ default n
+ help
+ ARC Linux by default checks for uboot provided args as pointers to
+ external cmdline or DTB. This however breaks in absence of uboot,
+ when booting from Metaware debugger directly, as the registers are
+ not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus
+ registers look like uboot args to kernel which then chokes.
+ So only enable the uboot arg checking/processing if users are sure
+ of uboot being in play.
+
config ARC_BUILTIN_DTB_NAME
string "Built in DTB"
help
UTS_MACHINE := arc
ifeq ($(CROSS_COMPILE),)
-CROSS_COMPILE := arc-linux-uclibc-
+CROSS_COMPILE := arc-linux-
endif
KBUILD_DEFCONFIG := nsim_700_defconfig
-cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__
+cflags-y += -fno-common -pipe -fno-builtin -D__linux__
+cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
+cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs
ifdef CONFIG_ARC_CURR_IN_REG
# For a global register defintion, make sure it gets passed to every file
cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock
cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape
-cflags-$(CONFIG_ARC_HAS_RTSC) += -mrtsc
+
+ifndef CONFIG_ARC_HAS_LL64
+cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64
+endif
+
cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
# w/o this dtb won't embed into kernel binary
core-y += arch/arc/boot/dts/
-core-$(CONFIG_ARC_PLAT_FPGA_LEGACY) += arch/arc/plat-arcfpga/
-core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_SIM) += arch/arc/plat-sim/
+core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/
drivers-$(CONFIG_OPROFILE) += arch/arc/oprofile/
# Built-in dtb
-builtindtb-y := angel4
+builtindtb-y := nsim_700
ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"")
builtindtb-y := $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME))
+++ /dev/null
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-/dts-v1/;
-
-/include/ "skeleton.dtsi"
-
-/ {
- compatible = "snps,arc-angel4";
- clock-frequency = <80000000>; /* 80 MHZ */
- #address-cells = <1>;
- #size-cells = <1>;
- interrupt-parent = <&intc>;
-
- chosen {
- bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
- };
-
- aliases {
- serial0 = &arcuart0;
- };
-
- fpga {
- compatible = "simple-bus";
- #address-cells = <1>;
- #size-cells = <1>;
-
- /* child and parent address space 1:1 mapped */
- ranges;
-
- intc: interrupt-controller {
- compatible = "snps,arc700-intc";
- interrupt-controller;
- #interrupt-cells = <1>;
- };
-
- arcuart0: serial@c0fc1000 {
- compatible = "snps,arc-uart";
- reg = <0xc0fc1000 0x100>;
- interrupts = <5>;
- clock-frequency = <80000000>;
- current-speed = <115200>;
- status = "okay";
- };
-
- ethernet@c0fc2000 {
- compatible = "snps,arc-emac";
- reg = <0xc0fc2000 0x3c>;
- interrupts = <6>;
- mac-address = [ 00 11 22 33 44 55 ];
- clock-frequency = <80000000>;
- max-speed = <100>;
- phy = <&phy0>;
-
- #address-cells = <1>;
- #size-cells = <0>;
- phy0: ethernet-phy@0 {
- reg = <1>;
- };
- };
-
- arcpmu0: pmu {
- compatible = "snps,arc700-pct";
- };
- };
-};
--- /dev/null
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC001 770D/EM6/AS221 CPU card
+ * Note that this file only supports the 770D CPU
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <750000000>; /* 750 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: arc700-intc@cpu {
+ compatible = "snps,arc700-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ /*
+ * this GPIO block ORs all interrupts on CPU card (creg,..)
+ * to uplink only 1 IRQ to ARC core intc
+ */
+ dw-apb-gpio@0x2000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = < 0x2000 0x80 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ictl_intc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <15>;
+ };
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33333000>;
+ interrupt-parent = <&ictl_intc>;
+ interrupts = <19 4>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ arcpmu0: pmu {
+ compatible = "snps,arc700-pct";
+ };
+ };
+
+ /*
+ * This INTC is actually connected to DW APB GPIO
+ * which acts as a wire between MB INTC and CPU INTC.
+ * GPIO INTC is configured in platform init code
+ * and here we mimic direct connection from MB INTC to
+ * CPU INTC, thus we set "interrupts = <7>" instead of
+ * "interrupts = <12>"
+ *
+ * This intc actually resides on MB, but we move it here to
+ * avoid duplicating the MB dtsi file given that IRQ from
+ * this intc to cpu intc are different for axs101 and axs103
+ */
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = < 7 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <75000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ /*
+ * this GPIO block ORs all interrupts on CPU card (creg,..)
+ * to uplink only 1 IRQ to ARC core intc
+ */
+ dw-apb-gpio@0x2000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = < 0x2000 0x80 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ictl_intc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <25>;
+ };
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33333000>;
+ interrupt-parent = <&ictl_intc>;
+ interrupts = <2 4>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <20>;
+ };
+ };
+
+ /*
+ * This INTC is actually connected to DW APB GPIO
+ * which acts as a wire between MB INTC and CPU INTC.
+ * GPIO INTC is configured in platform init code
+ * and here we mimic direct connection from MB INTC to
+ * CPU INTC, thus we set "interrupts = <7>" instead of
+ * "interrupts = <12>"
+ *
+ * This intc actually resides on MB, but we move it here to
+ * avoid duplicating the MB dtsi file given that IRQ from
+ * this intc to cpu intc are different for axs101 and axs103
+ */
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = < 24 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x2 (Dual Core) with IDU intc
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <75000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /*
+ * upstream irqs to core intc - downstream these are
+ * "COMMON" irq 0,1..
+ */
+ interrupts = <24 25>;
+ };
+
+ /*
+ * this GPIO block ORs all interrupts on CPU card (creg,..)
+ * to uplink only 1 IRQ to ARC core intc
+ */
+ dw-apb-gpio@0x2000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = < 0x2000 0x80 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ictl_intc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&idu_intc>;
+
+ /*
+ * cmn irq 1 -> cpu irq 25
+ * Distribute to cpu0 only
+ */
+ interrupts = <1 1>;
+ };
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33333000>;
+ interrupt-parent = <&ictl_intc>;
+ interrupts = <2 4>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <20>;
+ };
+ };
+
+ /*
+ * This INTC is actually connected to DW APB GPIO
+ * which acts as a wire between MB INTC and CPU INTC.
+ * GPIO INTC is configured in platform init code
+ * and here we mimic direct connection from MB INTC to
+ * CPU INTC, thus we set "interrupts = <0 1>" instead of
+ * "interrupts = <12>"
+ *
+ * This intc actually resides on MB, but we move it here to
+ * avoid duplicating the MB dtsi file given that IRQ from
+ * this intc to cpu intc are different for axs101 and axs103
+ */
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 1>; /* cmn irq 0 -> cpu irq 24
+ distribute to cpu0 only */
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC AXS101 S/W development platform
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "axc001.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs101", "snps,arc-sdp";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with UP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103", "snps,arc-sdp";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with SMP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003_idu.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103", "snps,arc-sdp";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+ };
+};
--- /dev/null
+/*
+ * Support for peripherals on the AXS10x mainboard
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+ axs10x_mb {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0xe0000000 0x10000000>;
+ interrupt-parent = <&mb_intc>;
+
+ clocks {
+ i2cclk: i2cclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+
+ apbclk: apbclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+
+ mmcclk: mmcclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+ };
+
+ ethernet@0x18000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dwmac";
+ reg = < 0x18000 0x2000 >;
+ interrupts = < 4 >;
+ interrupt-names = "macirq";
+ phy-mode = "rgmii";
+ snps,pbl = < 32 >;
+ clocks = <&apbclk>;
+ clock-names = "stmmaceth";
+ };
+
+ ehci@0x40000 {
+ compatible = "generic-ehci";
+ reg = < 0x40000 0x100 >;
+ interrupts = < 8 >;
+ };
+
+ ohci@0x60000 {
+ compatible = "generic-ohci";
+ reg = < 0x60000 0x100 >;
+ interrupts = < 8 >;
+ };
+
+ /*
+ * According to DW Mobile Storage databook it is required
+ * to use "Hold Register" if card is enumerated in SDR12 or
+ * SDR25 modes.
+ *
+ * Utilization of "Hold Register" is already implemented via
+ * dw_mci_pltfm_prepare_command() which in its turn gets
+ * used through dw_mci_drv_data->prepare_command call-back.
+ * This call-back is used in Altera Socfpga platform and so
+ * we may reuse it saying that we're compatible with their
+ * "altr,socfpga-dw-mshc".
+ *
+ * Most probably "Hold Register" utilization is platform-
+ * independent requirement which means that single unified
+ * "snps,dw-mshc" should be enough for all users of DW MMC once
+ * dw_mci_pltfm_prepare_command() is used in generic platform
+ * code.
+ */
+ mmc@0x15000 {
+ compatible = "altr,socfpga-dw-mshc";
+ reg = < 0x15000 0x400 >;
+ num-slots = < 1 >;
+ fifo-depth = < 16 >;
+ card-detect-delay = < 200 >;
+ clocks = <&apbclk>, <&mmcclk>;
+ clock-names = "biu", "ciu";
+ interrupts = < 7 >;
+ bus-width = < 4 >;
+ };
+
+ uart@0x20000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x20000 0x100>;
+ clock-frequency = <33333333>;
+ interrupts = <17>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ uart@0x21000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x21000 0x100>;
+ clock-frequency = <33333333>;
+ interrupts = <18>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ /* UART muxed with USB data port (ttyS3) */
+ uart@0x22000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x22000 0x100>;
+ clock-frequency = <33333333>;
+ interrupts = <19>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ i2c@0x1d000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x1d000 0x100>;
+ clock-frequency = <400000>;
+ clocks = <&i2cclk>;
+ interrupts = <14>;
+ };
+
+ i2c@0x1e000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x1e000 0x100>;
+ clock-frequency = <400000>;
+ clocks = <&i2cclk>;
+ interrupts = <15>;
+ };
+
+ i2c@0x1f000 {
+ compatible = "snps,designware-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x1f000 0x100>;
+ clock-frequency = <400000>;
+ clocks = <&i2cclk>;
+ interrupts = <16>;
+
+ eeprom@0x54{
+ compatible = "24c01";
+ reg = <0x54>;
+ pagesize = <0x8>;
+ };
+
+ eeprom@0x57{
+ compatible = "24c04";
+ reg = <0x57>;
+ pagesize = <0x8>;
+ };
+ };
+
+ gpio0:gpio@13000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x13000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio0_banka: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <32>;
+ reg = <0>;
+ };
+
+ gpio0_bankb: gpio-controller@1 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <1>;
+ };
+
+ gpio0_bankc: gpio-controller@2 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <2>;
+ };
+ };
+
+ gpio1:gpio@14000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x14000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio1_banka: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <30>;
+ reg = <0>;
+ };
+
+ gpio1_bankb: gpio-controller@1 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <10>;
+ reg = <1>;
+ };
+
+ gpio1_bankc: gpio-controller@2 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <2>;
+ };
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsim";
+ clock-frequency = <80000000>; /* 80 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&intc>;
+
+ chosen {
+ bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+ };
+
+ aliases {
+ serial0 = &arcuart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ intc: interrupt-controller {
+ compatible = "snps,arc700-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ arcuart0: serial@c0fc1000 {
+ compatible = "snps,arc-uart";
+ reg = <0xc0fc1000 0x100>;
+ interrupts = <5>;
+ clock-frequency = <80000000>;
+ current-speed = <115200>;
+ status = "okay";
+ };
+
+ ethernet@c0fc2000 {
+ compatible = "snps,arc-emac";
+ reg = <0xc0fc2000 0x3c>;
+ interrupts = <6>;
+ mac-address = [ 00 11 22 33 44 55 ];
+ clock-frequency = <80000000>;
+ max-speed = <100>;
+ phy = <&phy0>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <1>;
+ };
+ };
+
+ arcpmu0: pmu {
+ compatible = "snps,arc700-pct";
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsim_hs";
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+ };
+
+ aliases {
+ serial0 = &arcuart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ arcuart0: serial@c0fc1000 {
+ compatible = "snps,arc-uart";
+ reg = <0xc0fc1000 0x100>;
+ interrupts = <24>;
+ clock-frequency = <80000000>;
+ current-speed = <115200>;
+ status = "okay";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsim_hs";
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+ };
+
+ aliases {
+ serial0 = &arcuart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /*
+ * upstream irqs to core intc - downstream these are
+ * "COMMON" irq 0,1..
+ */
+ interrupts = <24 25 26 27 28 29 30 31>;
+ };
+
+ arcuart0: serial@c0fc1000 {
+ compatible = "snps,arc-uart";
+ reg = <0xc0fc1000 0x100>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 0>;
+ clock-frequency = <80000000>;
+ current-speed = <115200>;
+ status = "okay";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsimosci_hs";
+ clock-frequency = <20000000>; /* 20 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ /* this is for console on PGU */
+ /* bootargs = "console=tty0 consoleblank=0"; */
+ /* this is for console on serial */
+ bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ uart0: serial@f0000000 {
+ compatible = "ns8250";
+ reg = <0xf0000000 0x2000>;
+ interrupts = <24>;
+ clock-frequency = <3686400>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ no-loopback-test = <1>;
+ };
+
+ pgu0: pgu@f9000000 {
+ compatible = "snps,arcpgufb";
+ reg = <0xf9000000 0x400>;
+ };
+
+ ps2: ps2@f9001000 {
+ compatible = "snps,arc_ps2";
+ reg = <0xf9000400 0x14>;
+ interrupts = <27>;
+ interrupt-names = "arc_ps2_irq";
+ };
+
+ eth0: ethernet@f0003000 {
+ compatible = "snps,oscilan";
+ reg = <0xf0003000 0x44>;
+ interrupts = <25>, <26>;
+ interrupt-names = "rx", "tx";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+ compatible = "snps,nsimosci_hs";
+ clock-frequency = <5000000>; /* 5 MHZ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&core_intc>;
+
+ chosen {
+ /* this is for console on serial */
+ bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug";
+ };
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ fpga {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* child and parent address space 1:1 mapped */
+ ranges;
+
+ core_intc: core-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+/* interrupts = <16 17 18 19 20 21 22 23 24 25>; */
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&core_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ /*
+ * upstream irqs to core intc - downstream these are
+ * "COMMON" irq 0,1..
+ */
+ interrupts = <24 25 26 27 28 29 30 31>;
+ };
+
+ uart0: serial@f0000000 {
+ compatible = "ns8250";
+ reg = <0xf0000000 0x2000>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <0 0>; /* cmn irq 0 -> cpu irq 24
+ RR distribute to all cpus */
+ clock-frequency = <3686400>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ no-loopback-test = <1>;
+ };
+
+ pgu0: pgu@f9000000 {
+ compatible = "snps,arcpgufb";
+ reg = <0xf9000000 0x400>;
+ };
+
+ ps2: ps2@f9001000 {
+ compatible = "snps,arc_ps2";
+ reg = <0xf9000400 0x14>;
+ interrupts = <3 0>;
+ interrupt-parent = <&idu_intc>;
+ interrupt-names = "arc_ps2_irq";
+ };
+
+ eth0: ethernet@f0003000 {
+ compatible = "snps,oscilan";
+ reg = <0xf0003000 0x44>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <1 2>, <2 2>;
+ interrupt-names = "rx", "tx";
+ };
+
+ arcpct0: pct {
+ compatible = "snps,archs-pct";
+ #interrupt-cells = <1>;
+ interrupts = <20>;
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2013, 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration (VDK version)
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <50000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <2403200>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <19>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ };
+
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = < 18 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device tree for AXC003 CPU card:
+ * HS38x2 (Dual Core) with IDU intc (VDK version)
+ */
+
+/ {
+ compatible = "snps,arc";
+ clock-frequency = <50000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpu_card {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ cpu_intc: archs-intc@cpu {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ interrupt-parent = <&cpu_intc>;
+
+ /*
+ * <hwirq distribution>
+ * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
+ */
+ #interrupt-cells = <2>;
+
+ interrupts = <24 25 26 27>;
+ };
+
+ debug_uart: dw-apb-uart@0x5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <2403200>;
+ interrupt-parent = <&idu_intc>;
+ interrupts = <2 0>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ };
+
+ mb_intc: dw-apb-ictl@0xe0012000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dw-apb-ictl";
+ reg = < 0xe0012000 0x200 >;
+ interrupt-controller;
+ interrupt-parent = <&idu_intc>;
+ interrupts = < 0 0 >;
+ };
+
+ memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x80000000 0x40000000>;
+ device_type = "memory";
+ reg = <0x00000000 0x20000000>; /* 512MiB */
+ };
+};
--- /dev/null
+/*
+ * Support for peripherals on the AXS10x mainboard (VDK version)
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+ axs10x_mb_vdk {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0xe0000000 0x10000000>;
+ interrupt-parent = <&mb_intc>;
+
+ clocks {
+ apbclk: apbclk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ #clock-cells = <0>;
+ };
+
+ };
+
+ ethernet@0x18000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dwmac";
+ reg = < 0x18000 0x2000 >;
+ interrupts = < 4 >;
+ interrupt-names = "macirq";
+ phy-mode = "rgmii";
+ snps,phy-addr = < 0 >; // VDK model phy address is 0
+ snps,pbl = < 32 >;
+ clocks = <&apbclk>;
+ clock-names = "stmmaceth";
+ };
+
+ ehci@0x40000 {
+ compatible = "generic-ehci";
+ reg = < 0x40000 0x100 >;
+ interrupts = < 8 >;
+ };
+
+ uart@0x20000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x20000 0x100>;
+ clock-frequency = <2403200>;
+ interrupts = <17>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ uart@0x21000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x21000 0x100>;
+ clock-frequency = <2403200>;
+ interrupts = <18>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ uart@0x22000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x22000 0x100>;
+ clock-frequency = <2403200>;
+ interrupts = <19>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+/* PGU output directly sent to virtual LCD screen; hdmi controller not modelled */
+ pgu@0x17000 {
+ compatible = "snps,arcpgufb";
+ reg = <0x17000 0x400>;
+ clock-frequency = <51000000>; /* PGU'clock is initated in init function */
+ /* interrupts = <5>; PGU interrupts not used, this vector is used for ps2 below */
+ };
+
+/* VDK has additional ps2 keyboard/mouse interface integrated in LCD screen model */
+ ps2: ps2@e0017400 {
+ compatible = "snps,arc_ps2";
+ reg = <0x17400 0x14>;
+ interrupts = <5>;
+ interrupt-names = "arc_ps2_irq";
+ };
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit (VDK)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+ };
+};
--- /dev/null
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit, SMP version (VDK)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003_idu.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+ compatible = "snps,axs103";
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+ };
+};
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS101=y
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+CONFIG_ARC_BUILTIN_DTB_NAME="axs101"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_AXS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_AXS=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
CONFIG_PREEMPT=y
# CONFIG_COMPACTION is not set
# CONFIG_CROSS_MEMORY_ATTACH is not set
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_DEBUG_PREEMPT is not set
+CONFIG_XZ_DEC=y
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BOARD_ML509=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_ARC=y
+CONFIG_SERIAL_ARC_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_XZ_DEC=y
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_PLAT_SIM=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
# CONFIG_COMPACTION is not set
CONFIG_NET=y
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_NET_OSCI_LAN=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ARC_BOARD_ML509=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_HAS_LL64=y
+# CONFIG_ARC_HAS_RTSC is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NET_OSCI_LAN=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_ARC_PS2=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FTRACE=y
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="tb10x"
CONFIG_SYSVIPC=y
# CONFIG_BLOCK is not set
CONFIG_ARC_PLAT_TB10X=y
CONFIG_ARC_CACHE_LINE_SHIFT=5
-# CONFIG_ARC_HAS_RTSC is not set
CONFIG_ARC_STACK_NONEXEC=y
CONFIG_HZ=250
CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
--- /dev/null
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+# CONFIG_ARC_HAS_GRTC is not set
+CONFIG_ARC_UBOOT_SUPPORT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
generic-y += auxvec.h
-generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
#define ARC_REG_PERIBASE_BCR 0x69
#define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */
#define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */
+#define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */
+#define ARC_REG_SLC_BCR 0xce
#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */
#define ARC_REG_TIMERS_BCR 0x75
#define ARC_REG_AP_BCR 0x76
#define ARC_REG_BPU_BCR 0xc0
#define ARC_REG_ISA_CFG_BCR 0xc1
#define ARC_REG_RTT_BCR 0xF2
+#define ARC_REG_IRQ_BCR 0xF3
#define ARC_REG_SMART_BCR 0xFF
/* status32 Bits Positions */
* [15: 8] = Exception Cause Code
* [ 7: 0] = Exception Parameters (for certain types only)
*/
+#ifdef CONFIG_ISA_ARCOMPACT
#define ECR_V_MEM_ERR 0x01
#define ECR_V_INSN_ERR 0x02
#define ECR_V_MACH_CHK 0x20
#define ECR_V_DTLB_MISS 0x22
#define ECR_V_PROTV 0x23
#define ECR_V_TRAP 0x25
+#else
+#define ECR_V_MEM_ERR 0x01
+#define ECR_V_INSN_ERR 0x02
+#define ECR_V_MACH_CHK 0x03
+#define ECR_V_ITLB_MISS 0x04
+#define ECR_V_DTLB_MISS 0x05
+#define ECR_V_PROTV 0x06
+#define ECR_V_TRAP 0x09
+#endif
/* DTLB Miss and Protection Violation Cause Codes */
#define ECR_C_BIT_DTLB_LD_MISS 8
#define ECR_C_BIT_DTLB_ST_MISS 9
-/* Dummy ECR values for Interrupts */
-#define event_IRQ1 0x0031abcd
-#define event_IRQ2 0x0032abcd
/* Auxiliary registers */
#define AUX_IDENTITY 4
struct bcr_isa {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad1:23, atomic1:1, ver:8;
+ unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1,
+ pad1:11, atomic1:1, ver:8;
#else
- unsigned int ver:8, atomic1:1, pad1:23;
+ unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1,
+ ldd:1, pad2:4, div_rem:4;
#endif
};
#endif
};
+struct bcr_fp_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8;
+#else
+ unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15;
+#endif
+};
+
struct bcr_timer {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8;
+ unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8;
#else
- unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15;
+ unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15;
#endif
};
#endif
};
+struct bcr_bpu_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8;
+#else
+ unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6;
+#endif
+};
+
struct bcr_generic {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:24, ver:8;
*/
struct cpuinfo_arc_mmu {
- unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb;
+ unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6;
+ unsigned int num_tlb:16, sets:12, ways:4;
};
struct cpuinfo_arc_cache {
- unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6;
+ unsigned int sz_k:14, line_len:8, assoc:4, ver:4, alias:1, vipt:1;
};
struct cpuinfo_arc_bpu {
};
struct cpuinfo_arc {
- struct cpuinfo_arc_cache icache, dcache;
+ struct cpuinfo_arc_cache icache, dcache, slc;
struct cpuinfo_arc_mmu mmu;
struct cpuinfo_arc_bpu bpu;
struct bcr_identity core;
struct bcr_isa isa;
struct bcr_timer timers;
unsigned int vec_base;
- unsigned int uncached_base;
struct cpuinfo_arc_ccm iccm, dccm;
struct {
unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
extern struct cpuinfo_arc cpuinfo_arc700[];
+static inline int is_isa_arcv2(void)
+{
+ return IS_ENABLED(CONFIG_ISA_ARCV2);
+}
+
+static inline int is_isa_arcompact(void)
+{
+ return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
+}
+
+#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7)
+#error "Toolchain not configured for ARCompact builds"
+#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS)
+#error "Toolchain not configured for ARCv2 builds"
+#endif
+
#endif /* __ASEMBLY__ */
#endif /* _ASM_ARC_ARCREGS_H */
#define atomic_set(v, i) (((v)->counter) = (i))
+#ifdef CONFIG_ISA_ARCV2
+#define PREFETCHW " prefetchw [%1] \n"
+#else
+#define PREFETCHW
+#endif
+
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
unsigned int temp; \
\
__asm__ __volatile__( \
- "1: llock %0, [%1] \n" \
+ "1: \n" \
+ PREFETCHW \
+ " llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
{ \
unsigned int temp; \
\
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
__asm__ __volatile__( \
- "1: llock %0, [%1] \n" \
+ "1: \n" \
+ PREFETCHW \
+ " llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
: "r"(&v->counter), "ir"(i) \
: "cc"); \
\
+ smp_mb(); \
+ \
return temp; \
}
unsigned long flags; \
unsigned long temp; \
\
+ /* \
+ * spin lock/unlock provides the needed smp_mb() before/after \
+ */ \
atomic_ops_lock(flags); \
temp = v->counter; \
temp c_op i; \
#define __atomic_add_unless(v, a, u) \
({ \
int c, old; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
c = atomic_read(v); \
while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
c = old; \
+ \
+ smp_mb(); \
+ \
c; \
})
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+/*
+ * ARCv2 based HS38 cores are in-order issue, but still weakly ordered
+ * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ...
+ *
+ * Explicit barrier provided by DMB instruction
+ * - Operand supports fine grained load/store/load+store semantics
+ * - Ensures that selected memory operation issued before it will complete
+ * before any subsequent memory operation of same type
+ * - DMB guarantees SMP as well as local barrier semantics
+ * (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e.
+ * UP: barrier(), SMP: smp_*mb == *mb)
+ * - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed
+ * in the general case. Plus it only provides full barrier.
+ */
+
+#define mb() asm volatile("dmb 3\n" : : : "memory")
+#define rmb() asm volatile("dmb 1\n" : : : "memory")
+#define wmb() asm volatile("dmb 2\n" : : : "memory")
+
+#endif
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/*
+ * ARCompact based cores (ARC700) only have SYNC instruction which is super
+ * heavy weight as it flushes the pipeline as well.
+ * There are no real SMP implementations of such cores.
+ */
+
+#define mb() asm volatile("sync\n" : : : "memory")
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif
#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/barrier.h>
+#ifndef CONFIG_ARC_HAS_LLSC
+#include <asm/smp.h>
+#endif
-/*
- * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns.
- * The Kconfig glue ensures that in SMP, this is only set if the container
- * SoC/platform has cross-core coherent LLOCK/SCOND
- */
#if defined(CONFIG_ARC_HAS_LLSC)
-static inline void set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int temp;
-
- m += nr >> 5;
-
- /*
- * ARC ISA micro-optimization:
- *
- * Instructions dealing with bitpos only consider lower 5 bits (0-31)
- * e.g (x << 33) is handled like (x << 1) by ASL instruction
- * (mem pointer still needs adjustment to point to next word)
- *
- * Hence the masking to clamp @nr arg can be elided in general.
- *
- * However if @nr is a constant (above assumed it in a register),
- * and greater than 31, gcc can optimize away (x << 33) to 0,
- * as overflow, given the 32-bit ISA. Thus masking needs to be done
- * for constant @nr, but no code is generated due to const prop.
- */
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%1] \n"
- " bset %0, %0, %2 \n"
- " scond %0, [%1] \n"
- " bnz 1b \n"
- : "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-}
-
-static inline void clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%1] \n"
- " bclr %0, %0, %2 \n"
- " scond %0, [%1] \n"
- " bnz 1b \n"
- : "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-}
-
-static inline void change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
+/*
+ * Hardware assisted Atomic-R-M-W
+ */
- __asm__ __volatile__(
- "1: llock %0, [%1] \n"
- " bxor %0, %0, %2 \n"
- " scond %0, [%1] \n"
- " bnz 1b \n"
- : "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
+#define BIT_OP(op, c_op, asm_op) \
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned int temp; \
+ \
+ m += nr >> 5; \
+ \
+ /* \
+ * ARC ISA micro-optimization: \
+ * \
+ * Instructions dealing with bitpos only consider lower 5 bits \
+ * e.g (x << 33) is handled like (x << 1) by ASL instruction \
+ * (mem pointer still needs adjustment to point to next word) \
+ * \
+ * Hence the masking to clamp @nr arg can be elided in general. \
+ * \
+ * However if @nr is a constant (above assumed in a register), \
+ * and greater than 31, gcc can optimize away (x << 33) to 0, \
+ * as overflow, given the 32-bit ISA. Thus masking needs to be \
+ * done for const @nr, but no code is generated due to gcc \
+ * const prop. \
+ */ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ __asm__ __volatile__( \
+ "1: llock %0, [%1] \n" \
+ " " #asm_op " %0, %0, %2 \n" \
+ " scond %0, [%1] \n" \
+ " bnz 1b \n" \
+ : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
+ : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \
+ "ir"(nr) \
+ : "cc"); \
}
/*
* Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
* and the old value of bit is returned
*/
-static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%2] \n"
- " bset %1, %0, %3 \n"
- " scond %1, [%2] \n"
- " bnz 1b \n"
- : "=&r"(old), "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-
- return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int old, temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%2] \n"
- " bclr %1, %0, %3 \n"
- " scond %1, [%2] \n"
- " bnz 1b \n"
- : "=&r"(old), "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-
- return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned int old, temp;
-
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- __asm__ __volatile__(
- "1: llock %0, [%2] \n"
- " bxor %1, %0, %3 \n"
- " scond %1, [%2] \n"
- " bnz 1b \n"
- : "=&r"(old), "=&r"(temp)
- : "r"(m), "ir"(nr)
- : "cc");
-
- return (old & (1 << nr)) != 0;
+#define TEST_N_BIT_OP(op, c_op, asm_op) \
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long old, temp; \
+ \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND themselves don't provide any such smenatic \
+ */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: llock %0, [%2] \n" \
+ " " #asm_op " %1, %0, %3 \n" \
+ " scond %1, [%2] \n" \
+ " bnz 1b \n" \
+ : "=&r"(old), "=&r"(temp) \
+ : "r"(m), "ir"(nr) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return (old & (1 << nr)) != 0; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
-#include <asm/smp.h>
-
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
* at compile time)
*/
-static inline void set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- temp = *m;
- *m = temp | (1UL << nr);
-
- bitops_unlock(flags);
-}
-
-static inline void clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- temp = *m;
- *m = temp & ~(1UL << nr);
-
- bitops_unlock(flags);
-}
-
-static inline void change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- temp = *m;
- *m = temp ^ (1UL << nr);
-
- bitops_unlock(flags);
-}
-
-static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- old = *m;
- *m = old | (1 << nr);
-
- bitops_unlock(flags);
-
- return (old & (1 << nr)) != 0;
-}
-
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- old = *m;
- *m = old & ~(1 << nr);
-
- bitops_unlock(flags);
-
- return (old & (1 << nr)) != 0;
+#define BIT_OP(op, c_op, asm_op) \
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long temp, flags; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ /* \
+ * spin lock/unlock provide the needed smp_mb() before/after \
+ */ \
+ bitops_lock(flags); \
+ \
+ temp = *m; \
+ *m = temp c_op (1UL << nr); \
+ \
+ bitops_unlock(flags); \
}
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old, flags;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- bitops_lock(flags);
-
- old = *m;
- *m = old ^ (1 << nr);
-
- bitops_unlock(flags);
-
- return (old & (1 << nr)) != 0;
+#define TEST_N_BIT_OP(op, c_op, asm_op) \
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long old, flags; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ bitops_lock(flags); \
+ \
+ old = *m; \
+ *m = old c_op (1 << nr); \
+ \
+ bitops_unlock(flags); \
+ \
+ return (old & (1 << nr)) != 0; \
}
#endif /* CONFIG_ARC_HAS_LLSC */
* Non atomic variants
**************************************/
-static inline void __set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- temp = *m;
- *m = temp | (1UL << nr);
-}
-
-static inline void __clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- temp = *m;
- *m = temp & ~(1UL << nr);
-}
-
-static inline void __change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long temp;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- temp = *m;
- *m = temp ^ (1UL << nr);
-}
-
-static inline int
-__test_and_set_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- old = *m;
- *m = old | (1 << nr);
-
- return (old & (1 << nr)) != 0;
+#define __BIT_OP(op, c_op, asm_op) \
+static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \
+{ \
+ unsigned long temp; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ temp = *m; \
+ *m = temp c_op (1UL << nr); \
}
-static inline int
-__test_and_clear_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- old = *m;
- *m = old & ~(1 << nr);
-
- return (old & (1 << nr)) != 0;
+#define __TEST_N_BIT_OP(op, c_op, asm_op) \
+static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{ \
+ unsigned long old; \
+ m += nr >> 5; \
+ \
+ if (__builtin_constant_p(nr)) \
+ nr &= 0x1f; \
+ \
+ old = *m; \
+ *m = old c_op (1 << nr); \
+ \
+ return (old & (1 << nr)) != 0; \
}
-static inline int
-__test_and_change_bit(unsigned long nr, volatile unsigned long *m)
-{
- unsigned long old;
- m += nr >> 5;
-
- if (__builtin_constant_p(nr))
- nr &= 0x1f;
-
- old = *m;
- *m = old ^ (1 << nr);
-
- return (old & (1 << nr)) != 0;
-}
+#define BIT_OPS(op, c_op, asm_op) \
+ \
+ /* set_bit(), clear_bit(), change_bit() */ \
+ BIT_OP(op, c_op, asm_op) \
+ \
+ /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
+ TEST_N_BIT_OP(op, c_op, asm_op) \
+ \
+ /* __set_bit(), __clear_bit(), __change_bit() */ \
+ __BIT_OP(op, c_op, asm_op) \
+ \
+ /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
+ __TEST_N_BIT_OP(op, c_op, asm_op)
+
+BIT_OPS(set, |, bset)
+BIT_OPS(clear, & ~, bclr)
+BIT_OPS(change, ^, bxor)
/*
* This routine doesn't need to be atomic.
return ((mask & *addr) != 0);
}
+#ifdef CONFIG_ISA_ARCOMPACT
+
/*
* Count the number of zeros, starting from MSB
* Helper for fls( ) friends
return ffs(word) - 1;
}
+#else /* CONFIG_ISA_ARCV2 */
+
+/*
+ * fls = Find Last Set in word
+ * @result: [1-32]
+ * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
+ */
+static inline __attribute__ ((const)) int fls(unsigned long x)
+{
+ int n;
+
+ asm volatile(
+ " fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */
+ " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */
+ : "=r"(n) /* Early clobber not needed */
+ : "r"(x)
+ : "cc");
+
+ return n;
+}
+
+/*
+ * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
+ */
+static inline __attribute__ ((const)) int __fls(unsigned long x)
+{
+ /* FLS insn has exactly same semantics as the API */
+ return __builtin_arc_fls(x);
+}
+
+/*
+ * ffs = Find First Set in word (LSB to MSB)
+ * @result: [1-32], 0 if all 0's
+ */
+static inline __attribute__ ((const)) int ffs(unsigned long x)
+{
+ int n;
+
+ asm volatile(
+ " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
+ " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */
+ " mov.z %0, 0 \n" /* 31(Z)-> 0 */
+ : "=r"(n) /* Early clobber not needed */
+ : "r"(x)
+ : "cc");
+
+ return n;
+}
+
+/*
+ * __ffs: Similar to ffs, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) int __ffs(unsigned long x)
+{
+ int n;
+
+ asm volatile(
+ " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
+ " mov.z %0, 0 \n" /* 31(Z)-> 0 */
+ : "=r"(n)
+ : "r"(x)
+ : "cc");
+
+ return n;
+
+}
+
+#endif /* CONFIG_ISA_ARCOMPACT */
+
/*
* ffz = Find First Zero in word.
* @return:[0-31], 32 if all 1's
#define ARC_REG_IC_IVIC 0x10
#define ARC_REG_IC_CTRL 0x11
#define ARC_REG_IC_IVIL 0x19
-#if defined(CONFIG_ARC_MMU_V3)
+#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
#define ARC_REG_IC_PTAG 0x1E
#endif
#define ARC_REG_DC_IVDL 0x4A
#define ARC_REG_DC_FLSH 0x4B
#define ARC_REG_DC_FLDL 0x4C
-#if defined(CONFIG_ARC_MMU_V3)
#define ARC_REG_DC_PTAG 0x5C
-#endif
/* Bit val in DC_CTRL */
#define DC_CTRL_INV_MODE_FLUSH 0x40
#define DC_CTRL_FLUSH_STATUS 0x100
+/*System-level cache (L2 cache) related Auxiliary registers */
+#define ARC_REG_SLC_CFG 0x901
+#define ARC_REG_SLC_CTRL 0x903
+#define ARC_REG_SLC_FLUSH 0x904
+#define ARC_REG_SLC_INVALIDATE 0x905
+#define ARC_REG_SLC_RGN_START 0x914
+#define ARC_REG_SLC_RGN_END 0x916
+
+/* Bit val in SLC_CONTROL */
+#define SLC_CTRL_IM 0x040
+#define SLC_CTRL_DISABLE 0x001
+#define SLC_CTRL_BUSY 0x100
+#define SLC_CTRL_RGN_OP_INV 0x200
+
#endif /* _ASM_CACHE_H */
void flush_icache_range(unsigned long start, unsigned long end);
void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr);
-#define __flush_dcache_page(p, v) \
- ___flush_dcache_page((unsigned long)p, (unsigned long)v)
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
#define __ASM_ARC_CMPXCHG_H
#include <linux/types.h>
+
+#include <asm/barrier.h>
#include <asm/smp.h>
#ifdef CONFIG_ARC_HAS_LLSC
{
unsigned long prev;
+ /*
+ * Explicit full memory barrier needed before/after as
+ * LLOCK/SCOND thmeselves don't provide any such semantics
+ */
+ smp_mb();
+
__asm__ __volatile__(
"1: llock %0, [%1] \n"
" brne %0, %2, 2f \n"
" scond %3, [%1] \n"
" bnz 1b \n"
"2: \n"
- : "=&r"(prev)
- : "r"(ptr), "ir"(expected),
- "r"(new) /* can't be "ir". scond can't take limm for "b" */
- : "cc");
+ : "=&r"(prev) /* Early clobber, to prevent reg reuse */
+ : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
+ "ir"(expected),
+ "r"(new) /* can't be "ir". scond can't take LIMM for "b" */
+ : "cc", "memory"); /* so that gcc knows memory is being written here */
+
+ smp_mb();
return prev;
}
int prev;
volatile unsigned long *p = ptr;
+ /*
+ * spin lock/unlock provide the needed smp_mb() before/after
+ */
atomic_ops_lock(flags);
prev = *p;
if (prev == expected)
switch (size) {
case 4:
+ smp_mb();
+
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r"(val)
: "r"(ptr)
: "memory");
+ smp_mb();
+
return val;
}
return __xchg_bad_pointer();
static inline void __delay(unsigned long loops)
{
__asm__ __volatile__(
- "1: sub.f %0, %0, 1 \n"
- " jpnz 1b \n"
- : "+r"(loops)
- :
- : "cc");
+ " lp 1f \n"
+ " nop \n"
+ "1: \n"
+ : "+l"(loops));
}
extern void __bad_udelay(void);
#include <asm-generic/dma-coherent.h>
#include <asm/cacheflush.h>
-#ifndef CONFIG_ARC_PLAT_NEEDS_CPU_TO_DMA
-/*
- * dma_map_* API take cpu addresses, which is kernel logical address in the
- * untranslated address space (0x8000_0000) based. The dma address (bus addr)
- * ideally needs to be 0x0000_0000 based hence these glue routines.
- * However given that intermediate bus bridges can ignore the high bit, we can
- * do with these routines being no-ops.
- * If a platform/device comes up which sriclty requires 0 based bus addr
- * (e.g. AHB-PCI bridge on Angel4 board), then it can provide it's own versions
- */
-#define plat_dma_addr_to_kernel(dev, addr) ((unsigned long)(addr))
-#define plat_kernel_addr_to_dma(dev, ptr) ((dma_addr_t)(ptr))
-
-#else
-#include <plat/dma_addr.h>
-#endif
-
void *dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp);
enum dma_data_direction dir)
{
_dma_cache_sync((unsigned long)cpu_addr, size, dir);
- return plat_kernel_addr_to_dma(dev, cpu_addr);
+ return (dma_addr_t)cpu_addr;
}
static inline void
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size,
- DMA_FROM_DEVICE);
+ _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE);
}
static inline void
dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size,
- DMA_TO_DEVICE);
+ _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE);
}
static inline void
unsigned long offset, size_t size,
enum dma_data_direction direction)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset,
- size, DMA_FROM_DEVICE);
+ _dma_cache_sync(dma_handle + offset, size, DMA_FROM_DEVICE);
}
static inline void
unsigned long offset, size_t size,
enum dma_data_direction direction)
{
- _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset,
- size, DMA_TO_DEVICE);
+ _dma_cache_sync(dma_handle + offset, size, DMA_TO_DEVICE);
}
static inline void
/* These ELF defines belong to uapi but libc elf.h already defines them */
#define EM_ARCOMPACT 93
+#define EM_ARCV2 195 /* ARCv2 Cores */
+
+#define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \
+ EM_ARCOMPACT : EM_ARCV2)
+
/* ARC Relocations (kernel Modules only) */
#define R_ARC_32 0x4
#define R_ARC_32_ME 0x1B
--- /dev/null
+
+#ifndef __ASM_ARC_ENTRY_ARCV2_H
+#define __ASM_ARC_ENTRY_ARCV2_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/thread_info.h> /* For THREAD_SIZE */
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_PROLOGUE called_from
+
+ ; Before jumping to Interrupt Vector, hardware micro-ops did following:
+ ; 1. SP auto-switched to kernel mode stack
+ ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0)
+ ; 3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32
+ ;
+ ; Now manually save: r12, sp, fp, gp, r25
+
+ PUSH r12
+
+ ; Saving pt_regs->sp correctly requires some extra work due to the way
+ ; Auto stack switch works
+ ; - U mode: retrieve it from AUX_USER_SP
+ ; - K mode: add the offset from current SP where H/w starts auto push
+ ;
+ ; Utilize the fact that Z bit is set if Intr taken in U mode
+ mov.nz r9, sp
+ add.nz r9, r9, SZ_PT_REGS - PT_sp - 4
+ bnz 1f
+
+ lr r9, [AUX_USER_SP]
+1:
+ PUSH r9 ; SP
+
+ PUSH fp
+ PUSH gp
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ PUSH r25 ; user_r25
+ GET_CURR_TASK_ON_CPU r25
+#else
+ sub sp, sp, 4
+#endif
+
+.ifnc \called_from, exception
+ sub sp, sp, 12 ; BTA/ECR/orig_r0 placeholder per pt_regs
+.endif
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE called_from
+
+.ifnc \called_from, exception
+ add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss
+.endif
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ POP r25
+#else
+ add sp, sp, 4
+#endif
+
+ POP gp
+ POP fp
+
+ ; Don't touch AUX_USER_SP if returning to K mode (Z bit set)
+ ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE)
+ add.z sp, sp, 4
+ bz 1f
+
+ POPAX AUX_USER_SP
+1:
+ POP r12
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+ ; Before jumping to Exception Vector, hardware micro-ops did following:
+ ; 1. SP auto-switched to kernel mode stack
+ ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0)
+ ;
+ ; Now manually save the complete reg file
+
+ PUSH r9 ; freeup a register: slot of erstatus
+
+ PUSHAX eret
+ sub sp, sp, 12 ; skip JLI, LDI, EI
+ PUSH lp_count
+ PUSHAX lp_start
+ PUSHAX lp_end
+ PUSH blink
+
+ PUSH r11
+ PUSH r10
+
+ ld.as r9, [sp, 10] ; load stashed r9 (status32 stack slot)
+ lr r10, [erstatus]
+ st.as r10, [sp, 10] ; save status32 at it's right stack slot
+
+ PUSH r9
+ PUSH r8
+ PUSH r7
+ PUSH r6
+ PUSH r5
+ PUSH r4
+ PUSH r3
+ PUSH r2
+ PUSH r1
+ PUSH r0
+
+ ; -- for interrupts, regs above are auto-saved by h/w in that order --
+ ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25)
+ ;
+ ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE)
+ ; Although H/w exception micro-ops do set Z flag for U mode (just like
+ ; for interrupts), it could get clobbered in case we soft land here from
+ ; a TLB Miss exception handler (tlbex.S)
+
+ and r10, r10, STATUS_U_MASK
+ xor.f 0, r10, STATUS_U_MASK
+
+ INTERRUPT_PROLOGUE exception
+
+ PUSHAX erbta
+ PUSHAX ecr ; r9 contains ECR, expected by EV_Trap
+
+ PUSH r0 ; orig_r0
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+
+ ; Assumes r0 has PT_status32
+ btst r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE
+
+ add sp, sp, 8 ; orig_r0/ECR don't need restoring
+ POPAX erbta
+
+ INTERRUPT_EPILOGUE exception
+
+ POP r0
+ POP r1
+ POP r2
+ POP r3
+ POP r4
+ POP r5
+ POP r6
+ POP r7
+ POP r8
+ POP r9
+ POP r10
+ POP r11
+
+ POP blink
+ POPAX lp_end
+ POPAX lp_start
+
+ POP r9
+ mov lp_count, r9
+
+ add sp, sp, 12 ; skip JLI, LDI, EI
+ POPAX eret
+ POPAX erstatus
+
+ ld.as r9, [sp, -12] ; reload r9 which got clobbered
+.endm
+
+.macro FAKE_RET_FROM_EXCPN
+ lr r9, [status32]
+ bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
+ or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+ kflag r9
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP reg
+ bmskn \reg, sp, THREAD_SHIFT - 1
+.endm
+
+/* Get CPU-ID of this core */
+.macro GET_CPU_ID reg
+ lr \reg, [identity]
+ xbfu \reg, \reg, 0xE8 /* 00111 01000 */
+ /* M = 8-1 N = 8 */
+.endm
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ * Stack switching code can no longer reliably rely on the fact that
+ * if we are NOT in user mode, stack is switched to kernel mode.
+ * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
+ * it's prologue including stack switching from user mode
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ * Normally CPU does this automatically, however when doing FAKE rtie,
+ * we also need to explicitly do this. The problem in macros
+ * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
+ *
+ * Vineetg: May 5th 2008
+ * -Modified CALLEE_REG save/restore macros to handle the fact that
+ * r25 contains the kernel current task ptr
+ * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
+ * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
+ * address Write back load ld.ab instead of seperate ld/add instn
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef __ASM_ARC_ENTRY_COMPACT_H
+#define __ASM_ARC_ENTRY_COMPACT_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-compact.h>
+#include <asm/thread_info.h> /* For THREAD_SIZE */
+
+/*--------------------------------------------------------------
+ * Switch to Kernel Mode stack if SP points to User Mode stack
+ *
+ * Entry : r9 contains pre-IRQ/exception/trap status32
+ * Exit : SP set to K mode stack
+ * SP at the time of entry (K/U) saved @ pt_regs->sp
+ * Clobbers: r9
+ *-------------------------------------------------------------*/
+
+.macro SWITCH_TO_KERNEL_STK
+
+ /* User Mode when this happened ? Yes: Proceed to switch stack */
+ bbit1 r9, STATUS_U_BIT, 88f
+
+ /* OK we were already in kernel mode when this event happened, thus can
+ * assume SP is kernel mode SP. _NO_ need to do any stack switching
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+ /* However....
+ * If Level 2 Interrupts enabled, we may end up with a corner case:
+ * 1. User Task executing
+ * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
+ * 3. But before it could switch SP from USER to KERNEL stack
+ * a L2 IRQ "Interrupts" L1
+ * Thay way although L2 IRQ happened in Kernel mode, stack is still
+ * not switched.
+ * To handle this, we may need to switch stack even if in kernel mode
+ * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
+ */
+ brlo sp, VMALLOC_START, 88f
+
+ /* TODO: vineetg:
+ * We need to be a bit more cautious here. What if a kernel bug in
+ * L1 ISR, caused SP to go whaco (some small value which looks like
+ * USER stk) and then we take L2 ISR.
+ * Above brlo alone would treat it as a valid L1-L2 sceanrio
+ * instead of shouting alound
+ * The only feasible way is to make sure this L2 happened in
+ * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
+ * L1 ISR before it switches stack
+ */
+
+#endif
+
+ /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */
+ /* save it nevertheless @ pt_regs->sp for uniformity */
+
+ b.d 66f
+ st sp, [sp, PT_sp - SZ_PT_REGS]
+
+88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
+
+ GET_CURR_TASK_ON_CPU r9
+
+ /* With current tsk in r9, get it's kernel mode stack base */
+ GET_TSK_STACK_BASE r9, r9
+
+ /* save U mode SP @ pt_regs->sp */
+ st sp, [r9, PT_sp - SZ_PT_REGS]
+
+ /* final SP switch */
+ mov sp, r9
+66:
+.endm
+
+/*------------------------------------------------------------
+ * "FAKE" a rtie to return from CPU Exception context
+ * This is to re-enable Exceptions within exception
+ * Look at EV_ProtV to see how this is actually used
+ *-------------------------------------------------------------*/
+
+.macro FAKE_RET_FROM_EXCPN
+
+ ld r9, [sp, PT_status32]
+ bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK)
+ bset r9, r9, STATUS_L_BIT
+ sr r9, [erstatus]
+ mov r9, 55f
+ sr r9, [eret]
+
+ rtie
+55:
+.endm
+
+/*--------------------------------------------------------------
+ * For early Exception/ISR Prologue, a core reg is temporarily needed to
+ * code the rest of prolog (stack switching). This is done by stashing
+ * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
+ *
+ * Before saving the full regfile - this reg is restored back, only
+ * to be saved again on kernel mode stack, as part of pt_regs.
+ *-------------------------------------------------------------*/
+.macro PROLOG_FREEUP_REG reg, mem
+#ifdef CONFIG_SMP
+ sr \reg, [ARC_REG_SCRATCH_DATA0]
+#else
+ st \reg, [\mem]
+#endif
+.endm
+
+.macro PROLOG_RESTORE_REG reg, mem
+#ifdef CONFIG_SMP
+ lr \reg, [ARC_REG_SCRATCH_DATA0]
+#else
+ ld \reg, [\mem]
+#endif
+.endm
+
+/*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+ /* Need at least 1 reg to code the early exception prologue */
+ PROLOG_FREEUP_REG r9, @ex_saved_reg1
+
+ /* U/K mode at time of exception (stack not switched if already K) */
+ lr r9, [erstatus]
+
+ /* ARC700 doesn't provide auto-stack switching */
+ SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /* Treat r25 as scratch reg (save on stack) and load with "current" */
+ PUSH r25
+ GET_CURR_TASK_ON_CPU r25
+#else
+ sub sp, sp, 4
+#endif
+
+ st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */
+ sub sp, sp, 4 /* skip pt_regs->sp, already saved above */
+
+ /* Restore r9 used to code the early prologue */
+ PROLOG_RESTORE_REG r9, @ex_saved_reg1
+
+ /* now we are ready to save the regfile */
+ SAVE_R0_TO_R12
+ PUSH gp
+ PUSH fp
+ PUSH blink
+ PUSHAX eret
+ PUSHAX erstatus
+ PUSH lp_count
+ PUSHAX lp_end
+ PUSHAX lp_start
+ PUSHAX erbta
+
+ lr r9, [ecr]
+ st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by system call or Exceptions
+ * SP should always be pointing to the next free stack element
+ * when entering this macro.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+ POPAX erbta
+ POPAX lp_start
+ POPAX lp_end
+
+ POP r9
+ mov lp_count, r9 ;LD to lp_count is not allowed
+
+ POPAX erstatus
+ POPAX eret
+ POP blink
+ POP fp
+ POP gp
+ RESTORE_R12_TO_R0
+
+ ld sp, [sp] /* restore original sp */
+ /* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Dummy ECR values for Interrupts */
+#define event_IRQ1 0x0031abcd
+#define event_IRQ2 0x0032abcd
+
+.macro INTERRUPT_PROLOGUE LVL
+
+ /* free up r9 as scratchpad */
+ PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg
+
+ /* Which mode (user/kernel) was the system in when intr occured */
+ lr r9, [status32_l\LVL\()]
+
+ SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /* Treat r25 as scratch reg (save on stack) and load with "current" */
+ PUSH r25
+ GET_CURR_TASK_ON_CPU r25
+#else
+ sub sp, sp, 4
+#endif
+
+ PUSH 0x003\LVL\()abcd /* Dummy ECR */
+ sub sp, sp, 8 /* skip orig_r0 (not needed)
+ skip pt_regs->sp, already saved above */
+
+ /* Restore r9 used to code the early prologue */
+ PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg
+
+ SAVE_R0_TO_R12
+ PUSH gp
+ PUSH fp
+ PUSH blink
+ PUSH ilink\LVL\()
+ PUSHAX status32_l\LVL\()
+ PUSH lp_count
+ PUSHAX lp_end
+ PUSHAX lp_start
+ PUSHAX bta_l\LVL\()
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by interrupt handlers.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE LVL
+ POPAX bta_l\LVL\()
+ POPAX lp_start
+ POPAX lp_end
+
+ POP r9
+ mov lp_count, r9 ;LD to lp_count is not allowed
+
+ POPAX status32_l\LVL\()
+ POP ilink\LVL\()
+ POP blink
+ POP fp
+ POP gp
+ RESTORE_R12_TO_R0
+
+ ld sp, [sp] /* restore original sp */
+ /* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP reg
+ bic \reg, sp, (THREAD_SIZE - 1)
+.endm
+
+/* Get CPU-ID of this core */
+.macro GET_CPU_ID reg
+ lr \reg, [identity]
+ lsr \reg, \reg, 8
+ bmsk \reg, \reg, 7
+.endm
+
+#endif /* __ASM_ARC_ENTRY_COMPACT_H */
/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- * Stack switching code can no longer reliably rely on the fact that
- * if we are NOT in user mode, stack is switched to kernel mode.
- * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
- * it's prologue including stack switching from user mode
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- * Normally CPU does this automatically, however when doing FAKE rtie,
- * we also need to explicitly do this. The problem in macros
- * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
- *
- * Vineetg: May 5th 2008
- * -Modified CALLEE_REG save/restore macros to handle the fact that
- * r25 contains the kernel current task ptr
- * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
- * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
- * address Write back load ld.ab instead of seperate ld/add instn
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
*/
#ifndef __ASM_ARC_ENTRY_H
#define __ASM_ARC_ENTRY_H
-#ifdef __ASSEMBLY__
#include <asm/unistd.h> /* For NR_syscalls defination */
-#include <asm/asm-offsets.h>
#include <asm/arcregs.h>
#include <asm/ptrace.h>
#include <asm/processor.h> /* For VMALLOC_START */
-#include <asm/thread_info.h> /* For THREAD_SIZE */
#include <asm/mmu.h>
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/entry-compact.h> /* ISA specific bits */
+#else
+#include <asm/entry-arcv2.h>
+#endif
+
/* Note on the LD/ST addr modes with addr reg wback
*
* LD.a same as LD.aw
POP r13
.endm
-#define OFF_USER_R25_FROM_R24 (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4
-
/*--------------------------------------------------------------
* Collect User Mode callee regs as struct callee_regs - needed by
* fork/do_signal/unaligned-access-emulation.
*-------------------------------------------------------------*/
.macro SAVE_CALLEE_SAVED_USER
+ mov r12, sp ; save SP as ref to pt_regs
SAVE_R13_TO_R24
#ifdef CONFIG_ARC_CURR_IN_REG
- ; Retrieve orig r25 and save it on stack
- ld.as r12, [sp, OFF_USER_R25_FROM_R24]
- st.a r12, [sp, -4]
+ ; Retrieve orig r25 and save it with rest of callee_regs
+ ld.as r12, [r12, PT_user_r25]
+ PUSH r12
#else
PUSH r25
#endif
.macro RESTORE_CALLEE_SAVED_USER
#ifdef CONFIG_ARC_CURR_IN_REG
- ld.ab r12, [sp, 4]
- st.as r12, [sp, OFF_USER_R25_FROM_R24]
+ POP r12
#else
POP r25
#endif
RESTORE_R24_TO_R13
+
+ ; SP is back to start of pt_regs
+#ifdef CONFIG_ARC_CURR_IN_REG
+ st.as r12, [sp, PT_user_r25]
+#endif
.endm
/*--------------------------------------------------------------
.endm
-/*--------------------------------------------------------------
- * Switch to Kernel Mode stack if SP points to User Mode stack
- *
- * Entry : r9 contains pre-IRQ/exception/trap status32
- * Exit : SP is set to kernel mode stack pointer
- * If CURR_IN_REG, r25 set to "current" task pointer
- * Clobbers: r9
- *-------------------------------------------------------------*/
-
-.macro SWITCH_TO_KERNEL_STK
-
- /* User Mode when this happened ? Yes: Proceed to switch stack */
- bbit1 r9, STATUS_U_BIT, 88f
-
- /* OK we were already in kernel mode when this event happened, thus can
- * assume SP is kernel mode SP. _NO_ need to do any stack switching
- */
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
- /* However....
- * If Level 2 Interrupts enabled, we may end up with a corner case:
- * 1. User Task executing
- * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
- * 3. But before it could switch SP from USER to KERNEL stack
- * a L2 IRQ "Interrupts" L1
- * Thay way although L2 IRQ happened in Kernel mode, stack is still
- * not switched.
- * To handle this, we may need to switch stack even if in kernel mode
- * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
- */
- brlo sp, VMALLOC_START, 88f
-
- /* TODO: vineetg:
- * We need to be a bit more cautious here. What if a kernel bug in
- * L1 ISR, caused SP to go whaco (some small value which looks like
- * USER stk) and then we take L2 ISR.
- * Above brlo alone would treat it as a valid L1-L2 sceanrio
- * instead of shouting alound
- * The only feasible way is to make sure this L2 happened in
- * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
- * L1 ISR before it switches stack
- */
-
-#endif
-
- /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack
- * safe-keeping not really needed, but it keeps the epilogue code
- * (SP restore) simpler/uniform.
- */
- b.d 66f
- mov r9, sp
-
-88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
-
- GET_CURR_TASK_ON_CPU r9
-
- /* With current tsk in r9, get it's kernel mode stack base */
- GET_TSK_STACK_BASE r9, r9
-
-66:
-#ifdef CONFIG_ARC_CURR_IN_REG
- /*
- * Treat r25 as scratch reg, save it on stack first
- * Load it with current task pointer
- */
- st r25, [r9, -4]
- GET_CURR_TASK_ON_CPU r25
-#endif
-
- /* Save Pre Intr/Exception User SP on kernel stack */
- st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25
-
- /* CAUTION:
- * SP should be set at the very end when we are done with everything
- * In case of 2 levels of interrupt we depend on value of SP to assume
- * that everything else is done (loading r25 etc)
- */
-
- /* set SP to point to kernel mode stack */
- mov sp, r9
-
- /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */
-
-.endm
-
-/*------------------------------------------------------------
- * "FAKE" a rtie to return from CPU Exception context
- * This is to re-enable Exceptions within exception
- * Look at EV_ProtV to see how this is actually used
- *-------------------------------------------------------------*/
-
-.macro FAKE_RET_FROM_EXCPN reg
-
- ld \reg, [sp, PT_status32]
- bic \reg, \reg, (STATUS_U_MASK|STATUS_DE_MASK)
- bset \reg, \reg, STATUS_L_BIT
- sr \reg, [erstatus]
- mov \reg, 55f
- sr \reg, [eret]
-
- rtie
-55:
-.endm
-
-/*
- * @reg [OUT] &thread_info of "current"
- */
-.macro GET_CURR_THR_INFO_FROM_SP reg
- bic \reg, sp, (THREAD_SIZE - 1)
-.endm
-
/*
* @reg [OUT] thread_info->flags of "current"
*/
ld \reg, [\reg, THREAD_INFO_FLAGS]
.endm
-/*--------------------------------------------------------------
- * For early Exception Prologue, a core reg is temporarily needed to
- * code the rest of prolog (stack switching). This is done by stashing
- * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
- *
- * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of pt_regs.
- *-------------------------------------------------------------*/
-.macro EXCPN_PROLOG_FREEUP_REG reg
-#ifdef CONFIG_SMP
- sr \reg, [ARC_REG_SCRATCH_DATA0]
-#else
- st \reg, [@ex_saved_reg1]
-#endif
-.endm
-
-.macro EXCPN_PROLOG_RESTORE_REG reg
-#ifdef CONFIG_SMP
- lr \reg, [ARC_REG_SCRATCH_DATA0]
-#else
- ld \reg, [@ex_saved_reg1]
-#endif
-.endm
-
-/*--------------------------------------------------------------
- * Exception Entry prologue
- * -Switches stack to K mode (if not already)
- * -Saves the register file
- *
- * After this it is safe to call the "C" handlers
- *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
-
- /* Need at least 1 reg to code the early exception prologue */
- EXCPN_PROLOG_FREEUP_REG r9
-
- /* U/K mode at time of exception (stack not switched if already K) */
- lr r9, [erstatus]
-
- /* ARC700 doesn't provide auto-stack switching */
- SWITCH_TO_KERNEL_STK
-
- /* save the regfile */
- SAVE_ALL_SYS
-.endm
-
-/*--------------------------------------------------------------
- * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
- * Requires SP to be already switched to kernel mode Stack
- * sp points to the next free element on the stack at exit of this macro.
- * Registers are pushed / popped in the order defined in struct ptregs
- * in asm/ptrace.h
- * Note that syscalls are implemented via TRAP which is also a exception
- * from CPU's point of view
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_SYS
-
- lr r9, [ecr]
- st r9, [sp, 8] /* ECR */
- st r0, [sp, 4] /* orig_r0, needed only for sys calls */
-
- /* Restore r9 used to code the early prologue */
- EXCPN_PROLOG_RESTORE_REG r9
-
- SAVE_R0_TO_R12
- PUSH gp
- PUSH fp
- PUSH blink
- PUSHAX eret
- PUSHAX erstatus
- PUSH lp_count
- PUSHAX lp_end
- PUSHAX lp_start
- PUSHAX erbta
-.endm
-
-/*--------------------------------------------------------------
- * Restore all registers used by system call or Exceptions
- * SP should always be pointing to the next free stack element
- * when entering this macro.
- *
- * NOTE:
- *
- * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
- * by hardware and that is not good.
- *-------------------------------------------------------------*/
-.macro RESTORE_ALL_SYS
- POPAX erbta
- POPAX lp_start
- POPAX lp_end
-
- POP r9
- mov lp_count, r9 ;LD to lp_count is not allowed
-
- POPAX erstatus
- POPAX eret
- POP blink
- POP fp
- POP gp
- RESTORE_R12_TO_R0
-
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-
-/*--------------------------------------------------------------
- * Save all registers used by interrupt handlers.
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_INT1
-
- /* restore original r9 to be saved as part of reg-file */
-#ifdef CONFIG_SMP
- lr r9, [ARC_REG_SCRATCH_DATA0]
-#else
- ld r9, [@int1_saved_reg]
-#endif
-
- /* now we are ready to save the remaining context :) */
- st event_IRQ1, [sp, 8] /* Dummy ECR */
- st 0, [sp, 4] /* orig_r0 , N/A for IRQ */
-
- SAVE_R0_TO_R12
- PUSH gp
- PUSH fp
- PUSH blink
- PUSH ilink1
- PUSHAX status32_l1
- PUSH lp_count
- PUSHAX lp_end
- PUSHAX lp_start
- PUSHAX bta_l1
-.endm
-
-.macro SAVE_ALL_INT2
-
- /* TODO-vineetg: SMP we can't use global nor can we use
- * SCRATCH0 as we do for int1 because while int1 is using
- * it, int2 can come
- */
- /* retsore original r9 , saved in sys_saved_r9 */
- ld r9, [@int2_saved_reg]
-
- /* now we are ready to save the remaining context :) */
- st event_IRQ2, [sp, 8] /* Dummy ECR */
- st 0, [sp, 4] /* orig_r0 , N/A for IRQ */
-
- SAVE_R0_TO_R12
- PUSH gp
- PUSH fp
- PUSH blink
- PUSH ilink2
- PUSHAX status32_l2
- PUSH lp_count
- PUSHAX lp_end
- PUSHAX lp_start
- PUSHAX bta_l2
-.endm
-
-/*--------------------------------------------------------------
- * Restore all registers used by interrupt handlers.
- *
- * NOTE:
- *
- * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
- * by hardware and that is not good.
- *-------------------------------------------------------------*/
-
-.macro RESTORE_ALL_INT1
- POPAX bta_l1
- POPAX lp_start
- POPAX lp_end
-
- POP r9
- mov lp_count, r9 ;LD to lp_count is not allowed
-
- POPAX status32_l1
- POP ilink1
- POP blink
- POP fp
- POP gp
- RESTORE_R12_TO_R0
-
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-.macro RESTORE_ALL_INT2
- POPAX bta_l2
- POPAX lp_start
- POPAX lp_end
-
- POP r9
- mov lp_count, r9 ;LD to lp_count is not allowed
-
- POPAX status32_l2
- POP ilink2
- POP blink
- POP fp
- POP gp
- RESTORE_R12_TO_R0
-
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
-.endm
-
-
-/* Get CPU-ID of this core */
-.macro GET_CPU_ID reg
- lr \reg, [identity]
- lsr \reg, \reg, 8
- bmsk \reg, \reg, 7
-.endm
-
#ifdef CONFIG_SMP
/*-------------------------------------------------
#endif /* CONFIG_ARC_CURR_IN_REG */
-#endif /* __ASSEMBLY__ */
-
#endif /* __ASM_ARC_ENTRY_H */
}
-#define readb_relaxed readb
-#define readw_relaxed readw
-#define readl_relaxed readl
+#ifdef CONFIG_ISA_ARCV2
+#include <asm/barrier.h>
+#define __iormb() rmb()
+#define __iowmb() wmb()
+#else
+#define __iormb() do { } while (0)
+#define __iowmb() do { } while (0)
+#endif
+
+/*
+ * MMIO can also get buffered/optimized in micro-arch, so barriers needed
+ * Based on ARM model for the typical use case
+ *
+ * <ST [DMA buffer]>
+ * <writel MMIO "go" reg>
+ * or:
+ * <readl MMIO "status" reg>
+ * <LD [DMA buffer]>
+ *
+ * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com
+ */
+#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+
+#define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); })
+#define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); })
+#define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); })
+
+/*
+ * Relaxed API for drivers which can handle any ordering themselves
+ */
+#define readb_relaxed(c) __raw_readb(c)
+#define readw_relaxed(c) __raw_readw(c)
+#define readl_relaxed(c) __raw_readl(c)
+
+#define writeb_relaxed(v,c) __raw_writeb(v,c)
+#define writew_relaxed(v,c) __raw_writew(v,c)
+#define writel_relaxed(v,c) __raw_writel(v,c)
#include <asm-generic/io.h>
#define NR_IRQS 128 /* allow some CPU external IRQ handling */
/* Platform Independent IRQs */
+#ifdef CONFIG_ISA_ARCOMPACT
#define TIMER0_IRQ 3
#define TIMER1_IRQ 4
+#else
+#define TIMER0_IRQ 16
+#define TIMER1_IRQ 17
+#define IPI_IRQ 19
+#endif
#include <linux/interrupt.h>
#include <asm-generic/irq.h>
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCV2_H
+#define __ASM_IRQFLAGS_ARCV2_H
+
+#include <asm/arcregs.h>
+
+/* status32 Bits */
+#define STATUS_AD_BIT 19 /* Disable Align chk: core supports non-aligned */
+#define STATUS_IE_BIT 31
+
+#define STATUS_AD_MASK (1<<STATUS_AD_BIT)
+#define STATUS_IE_MASK (1<<STATUS_IE_BIT)
+
+#define AUX_USER_SP 0x00D
+#define AUX_IRQ_CTRL 0x00E
+#define AUX_IRQ_ACT 0x043 /* Active Intr across all levels */
+#define AUX_IRQ_LVL_PEND 0x200 /* Pending Intr across all levels */
+#define AUX_IRQ_PRIORITY 0x206
+#define ICAUSE 0x40a
+#define AUX_IRQ_SELECT 0x40b
+#define AUX_IRQ_ENABLE 0x40c
+
+/* Was Intr taken in User Mode */
+#define AUX_IRQ_ACT_BIT_U 31
+
+/* 0 is highest level, but taken by FIRQs, if present in design */
+#define ARCV2_IRQ_DEF_PRIO 0
+
+/* seed value for status register */
+#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \
+ (ARCV2_IRQ_DEF_PRIO << 1))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+ unsigned long flags;
+
+ __asm__ __volatile__(" clri %0 \n" : "=r" (flags) : : "memory");
+
+ return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+ __asm__ __volatile__(" seti %0 \n" : : "r" (flags) : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+static inline void arch_local_irq_enable(void)
+{
+ unsigned int irqact = read_aux_reg(AUX_IRQ_ACT);
+
+ if (irqact & 0xffff)
+ write_aux_reg(AUX_IRQ_ACT, irqact & ~0xffff);
+
+ __asm__ __volatile__(" seti \n" : : : "memory");
+}
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+ __asm__ __volatile__(" clri \n" : : : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " lr %0, [status32] \n"
+ : "=&r"(temp)
+ :
+ : "memory");
+
+ return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & (STATUS_IE_MASK));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+.macro IRQ_DISABLE scratch
+ clri
+.endm
+
+.macro IRQ_ENABLE scratch
+ seti
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCOMPACT_H
+#define __ASM_IRQFLAGS_ARCOMPACT_H
+
+/* vineetg: March 2010 : local_irq_save( ) optimisation
+ * -Remove explicit mov of current status32 into reg, that is not needed
+ * -Use BIC insn instead of INVERTED + AND
+ * -Conditionally disable interrupts (if they are not enabled, don't disable)
+*/
+
+#include <asm/arcregs.h>
+
+/* status32 Reg bits related to Interrupt Handling */
+#define STATUS_E1_BIT 1 /* Int 1 enable */
+#define STATUS_E2_BIT 2 /* Int 2 enable */
+#define STATUS_A1_BIT 3 /* Int 1 active */
+#define STATUS_A2_BIT 4 /* Int 2 active */
+
+#define STATUS_E1_MASK (1<<STATUS_E1_BIT)
+#define STATUS_E2_MASK (1<<STATUS_E2_BIT)
+#define STATUS_A1_MASK (1<<STATUS_A1_BIT)
+#define STATUS_A2_MASK (1<<STATUS_A2_BIT)
+#define STATUS_IE_MASK (STATUS_E1_MASK | STATUS_E2_MASK)
+
+/* Other Interrupt Handling related Aux regs */
+#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */
+#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */
+#define AUX_IRQ_LV12 0x43 /* interrupt level register */
+
+#define AUX_IENABLE 0x40c
+#define AUX_ITRIGGER 0x40d
+#define AUX_IPULSE 0x415
+
+#define ISA_INIT_STATUS_BITS STATUS_IE_MASK
+
+#ifndef __ASSEMBLY__
+
+/******************************************************************
+ * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67
+ * Reasoning : https://lkml.org/lkml/2013/4/8/15
+ *
+ ******************************************************************/
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+ unsigned long temp, flags;
+
+ __asm__ __volatile__(
+ " lr %1, [status32] \n"
+ " bic %0, %1, %2 \n"
+ " and.f 0, %1, %2 \n"
+ " flag.nz %0 \n"
+ : "=r"(temp), "=r"(flags)
+ : "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+ : "memory", "cc");
+
+ return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+
+ __asm__ __volatile__(
+ " flag %0 \n"
+ :
+ : "r"(flags)
+ : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+extern void arch_local_irq_enable(void);
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " lr %0, [status32] \n"
+ " and %0, %0, %1 \n"
+ " flag %0 \n"
+ : "=&r"(temp)
+ : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
+ : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " lr %0, [status32] \n"
+ : "=&r"(temp)
+ :
+ : "memory");
+
+ return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & (STATUS_E1_MASK
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+ | STATUS_E2_MASK
+#endif
+ ));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+ bl trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+ bl trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+
+.macro IRQ_DISABLE scratch
+ lr \scratch, [status32]
+ bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+ flag \scratch
+ TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro IRQ_ENABLE scratch
+ lr \scratch, [status32]
+ or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+ flag \scratch
+ TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
#ifndef __ASM_ARC_IRQFLAGS_H
#define __ASM_ARC_IRQFLAGS_H
-/* vineetg: March 2010 : local_irq_save( ) optimisation
- * -Remove explicit mov of current status32 into reg, that is not needed
- * -Use BIC insn instead of INVERTED + AND
- * -Conditionally disable interrupts (if they are not enabled, don't disable)
-*/
-
-#include <asm/arcregs.h>
-
-/* status32 Reg bits related to Interrupt Handling */
-#define STATUS_E1_BIT 1 /* Int 1 enable */
-#define STATUS_E2_BIT 2 /* Int 2 enable */
-#define STATUS_A1_BIT 3 /* Int 1 active */
-#define STATUS_A2_BIT 4 /* Int 2 active */
-
-#define STATUS_E1_MASK (1<<STATUS_E1_BIT)
-#define STATUS_E2_MASK (1<<STATUS_E2_BIT)
-#define STATUS_A1_MASK (1<<STATUS_A1_BIT)
-#define STATUS_A2_MASK (1<<STATUS_A2_BIT)
-
-/* Other Interrupt Handling related Aux regs */
-#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */
-#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */
-#define AUX_IRQ_LV12 0x43 /* interrupt level register */
-
-#define AUX_IENABLE 0x40c
-#define AUX_ITRIGGER 0x40d
-#define AUX_IPULSE 0x415
-
-#ifndef __ASSEMBLY__
-
-/******************************************************************
- * IRQ Control Macros
- *
- * All of them have "memory" clobber (compiler barrier) which is needed to
- * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
- * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
- *
- * Noted at the time of Abilis Timer List corruption
- * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67
- * Reasoning : https://lkml.org/lkml/2013/4/8/15
- *
- ******************************************************************/
-
-/*
- * Save IRQ state and disable IRQs
- */
-static inline long arch_local_irq_save(void)
-{
- unsigned long temp, flags;
-
- __asm__ __volatile__(
- " lr %1, [status32] \n"
- " bic %0, %1, %2 \n"
- " and.f 0, %1, %2 \n"
- " flag.nz %0 \n"
- : "=r"(temp), "=r"(flags)
- : "n"((STATUS_E1_MASK | STATUS_E2_MASK))
- : "memory", "cc");
-
- return flags;
-}
-
-/*
- * restore saved IRQ state
- */
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-
- __asm__ __volatile__(
- " flag %0 \n"
- :
- : "r"(flags)
- : "memory");
-}
-
-/*
- * Unconditionally Enable IRQs
- */
-extern void arch_local_irq_enable(void);
-
-/*
- * Unconditionally Disable IRQs
- */
-static inline void arch_local_irq_disable(void)
-{
- unsigned long temp;
-
- __asm__ __volatile__(
- " lr %0, [status32] \n"
- " and %0, %0, %1 \n"
- " flag %0 \n"
- : "=&r"(temp)
- : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
- : "memory");
-}
-
-/*
- * save IRQ state
- */
-static inline long arch_local_save_flags(void)
-{
- unsigned long temp;
-
- __asm__ __volatile__(
- " lr %0, [status32] \n"
- : "=&r"(temp)
- :
- : "memory");
-
- return temp;
-}
-
-/*
- * Query IRQ state
- */
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & (STATUS_E1_MASK
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
- | STATUS_E2_MASK
-#endif
- ));
-}
-
-static inline int arch_irqs_disabled(void)
-{
- return arch_irqs_disabled_flags(arch_local_save_flags());
-}
-
-#else
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-.macro TRACE_ASM_IRQ_DISABLE
- bl trace_hardirqs_off
-.endm
-
-.macro TRACE_ASM_IRQ_ENABLE
- bl trace_hardirqs_on
-.endm
-
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/irqflags-compact.h>
#else
-
-.macro TRACE_ASM_IRQ_DISABLE
-.endm
-
-.macro TRACE_ASM_IRQ_ENABLE
-.endm
-
+#include <asm/irqflags-arcv2.h>
#endif
-.macro IRQ_DISABLE scratch
- lr \scratch, [status32]
- bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
- flag \scratch
- TRACE_ASM_IRQ_DISABLE
-.endm
-
-.macro IRQ_ENABLE scratch
- lr \scratch, [status32]
- or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
- flag \scratch
- TRACE_ASM_IRQ_ENABLE
-.endm
-
-#endif /* __ASSEMBLY__ */
-
#endif
--- /dev/null
+/*
+ * ARConnect IP Support (Multi core enabler: Cross core IPI, RTC ...)
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MCIP_H
+#define __ASM_MCIP_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+#include <asm/arcregs.h>
+
+#define ARC_REG_MCIP_BCR 0x0d0
+#define ARC_REG_MCIP_CMD 0x600
+#define ARC_REG_MCIP_WDATA 0x601
+#define ARC_REG_MCIP_READBACK 0x602
+
+struct mcip_cmd {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:8, param:16, cmd:8;
+#else
+ unsigned int cmd:8, param:16, pad:8;
+#endif
+
+#define CMD_INTRPT_GENERATE_IRQ 0x01
+#define CMD_INTRPT_GENERATE_ACK 0x02
+#define CMD_INTRPT_READ_STATUS 0x03
+#define CMD_INTRPT_CHECK_SOURCE 0x04
+
+/* Semaphore Commands */
+#define CMD_SEMA_CLAIM_AND_READ 0x11
+#define CMD_SEMA_RELEASE 0x12
+
+#define CMD_DEBUG_SET_MASK 0x34
+#define CMD_DEBUG_SET_SELECT 0x36
+
+#define CMD_GRTC_READ_LO 0x42
+#define CMD_GRTC_READ_HI 0x43
+
+#define CMD_IDU_ENABLE 0x71
+#define CMD_IDU_DISABLE 0x72
+#define CMD_IDU_SET_MODE 0x74
+#define CMD_IDU_SET_DEST 0x76
+#define CMD_IDU_SET_MASK 0x7C
+
+#define IDU_M_TRIG_LEVEL 0x0
+#define IDU_M_TRIG_EDGE 0x1
+
+#define IDU_M_DISTRI_RR 0x0
+#define IDU_M_DISTRI_DEST 0x2
+};
+
+/*
+ * MCIP programming model
+ *
+ * - Simple commands write {cmd:8,param:16} to MCIP_CMD aux reg
+ * (param could be irq, common_irq, core_id ...)
+ * - More involved commands setup MCIP_WDATA with cmd specific data
+ * before invoking the simple command
+ */
+static inline void __mcip_cmd(unsigned int cmd, unsigned int param)
+{
+ struct mcip_cmd buf;
+
+ buf.pad = 0;
+ buf.cmd = cmd;
+ buf.param = param;
+
+ WRITE_AUX(ARC_REG_MCIP_CMD, buf);
+}
+
+/*
+ * Setup additional data for a cmd
+ * Callers need to lock to ensure atomicity
+ */
+static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param,
+ unsigned int data)
+{
+ write_aux_reg(ARC_REG_MCIP_WDATA, data);
+
+ __mcip_cmd(cmd, param);
+}
+
+extern void mcip_init_early_smp(void);
+extern void mcip_init_smp(unsigned int cpu);
+
+#endif
+
+#endif
#define CONFIG_ARC_MMU_VER 2
#elif defined(CONFIG_ARC_MMU_V3)
#define CONFIG_ARC_MMU_VER 3
+#elif defined(CONFIG_ARC_MMU_V4)
+#define CONFIG_ARC_MMU_VER 4
#endif
/* MMU Management regs */
#define ARC_REG_MMU_BCR 0x06f
+#if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
+#else
+#define ARC_REG_TLBPD0 0x460
+#define ARC_REG_TLBPD1 0x461
+#define ARC_REG_TLBINDEX 0x464
+#define ARC_REG_TLBCOMMAND 0x465
+#define ARC_REG_PID 0x468
+#define ARC_REG_SCRATCH_DATA0 0x46c
+#endif
/* Bits in MMU PID register */
-#define MMU_ENABLE (1 << 31) /* Enable MMU for process */
+#define __TLB_ENABLE (1 << 31)
+#define __PROG_ENABLE (1 << 30)
+#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Error code if probe fails */
#define TLB_LKUP_ERR 0x80000000
+#if (CONFIG_ARC_MMU_VER < 4)
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
+#endif
/* TLB Commands */
#define TLBWrite 0x1
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#endif
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define TLBInsertEntry 0x7
+#define TLBDeleteEntry 0x8
+#endif
+
#ifndef __ASSEMBLY__
typedef struct {
#define _PAGE_READ (1<<3) /* Page has user read perm (H) */
#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */
#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */
+
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */
+#endif
+
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */
+
+#if (CONFIG_ARC_MMU_VER >= 4)
+#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */
+#endif
+
#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr
usable for shared TLB entries (H) */
#endif
*/
#define TSK_K_ESP(tsk) (tsk->thread.ksp)
-#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \
+#define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \
sizeof(struct callee_regs) + off)))
#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4)
#endif /* !__ASSEMBLY__ */
-/* Kernels Virtual memory area.
- * Unlike other architectures(MIPS, sh, cris ) ARC 700 does not have a
- * "kernel translated" region (like KSEG2 in MIPS). So we use a upper part
- * of the translated bottom 2GB for kernel virtual memory and protect
- * these pages from user accesses by disabling Ru, Eu and Wu.
+/*
+ * System Memory Map on ARC
+ *
+ * ---------------------------- (lower 2G, Translated) -------------------------
+ * 0x0000_0000 0x5FFF_FFFF (user vaddr: TASK_SIZE)
+ * 0x6000_0000 0x6FFF_FFFF (reserved gutter between U/K)
+ * 0x7000_0000 0x7FFF_FFFF (kvaddr: vmalloc/modules/pkmap..)
+ *
+ * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) -----------------------
+ * 0x8000_0000 0xBFFF_FFFF (kernel direct mapped)
+ * 0xC000_0000 0xFFFF_FFFF (peripheral uncached space)
+ * -----------------------------------------------------------------------------
*/
-#define VMALLOC_SIZE (0x10000000) /* 256M */
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-#define VMALLOC_END (PAGE_OFFSET)
+#define VMALLOC_START 0x70000000
+#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START)
+#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
-/* Most of the architectures seem to be keeping some kind of padding between
- * userspace TASK_SIZE and PAGE_OFFSET. i.e TASK_SIZE != PAGE_OFFSET.
- */
#define USER_KERNEL_GUTTER 0x10000000
-/* User address space:
- * On ARC700, CPU allows the entire lower half of 32 bit address space to be
- * translated. Thus potentially 2G (0:0x7FFF_FFFF) could be User vaddr space.
- * However we steal 256M for kernel addr (0x7000_0000:0x7FFF_FFFF) and another
- * 256M (0x6000_0000:0x6FFF_FFFF) is gutter between user/kernel spaces
- * Thus total User vaddr space is (0:0x5FFF_FFFF)
- */
-#define TASK_SIZE (PAGE_OFFSET - VMALLOC_SIZE - USER_KERNEL_GUTTER)
+#define TASK_SIZE (VMALLOC_START - USER_KERNEL_GUTTER)
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
/* THE pt_regs: Defines how regs are saved during entry into kernel */
+#ifdef CONFIG_ISA_ARCOMPACT
struct pt_regs {
/* Real registers */
long user_r25;
};
+#else
+
+struct pt_regs {
+
+ long orig_r0;
+
+ union {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned long state:8, ecr_vec:8,
+ ecr_cause:8, ecr_param:8;
+#else
+ unsigned long ecr_param:8, ecr_cause:8,
+ ecr_vec:8, state:8;
+#endif
+ };
+ unsigned long event;
+ };
+
+ long bta; /* bta_l1, bta_l2, erbta */
+
+ long user_r25;
+
+ long r26; /* gp */
+ long fp;
+ long sp; /* user/kernel sp depending on where we came from */
+
+ long r12;
+
+ /*------- Below list auto saved by h/w -----------*/
+ long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
+
+ long blink;
+ long lp_end, lp_start, lp_count;
+
+ long ei, ldi, jli;
+
+ long ret;
+ long status32;
+};
+
+#endif
/* Callee saved registers - need to be saved only when you are scheduled out */
{
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ /*
+ * This smp_mb() is technically superfluous, we only need the one
+ * after the lock for providing the ACQUIRE semantics.
+ * However doing the "right" thing was regressing hackbench
+ * so keeping this, pending further investigation
+ */
+ smp_mb();
+
__asm__ __volatile__(
"1: ex %0, [%1] \n"
" breq %0, %2, 1b \n"
: "+&r" (tmp)
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
: "memory");
+
+ /*
+ * ACQUIRE barrier to ensure load/store after taking the lock
+ * don't "bleed-up" out of the critical section (leak-in is allowed)
+ * http://www.spinics.net/lists/kernel/msg2010409.html
+ *
+ * ARCv2 only has load-load, store-store and all-all barrier
+ * thus need the full all-all barrier
+ */
+ smp_mb();
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+ smp_mb();
+
__asm__ __volatile__(
"1: ex %0, [%1] \n"
: "+r" (tmp)
: "r"(&(lock->slock))
: "memory");
+ smp_mb();
+
return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
}
{
unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+ /*
+ * RELEASE barrier: given the instructions avail on ARCv2, full barrier
+ * is the only option
+ */
+ smp_mb();
+
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r" (tmp)
: "r"(&(lock->slock))
: "memory");
+ /*
+ * superfluous, but keeping for now - see pairing version in
+ * arch_spin_lock above
+ */
smp_mb();
}
#endif
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER)
#ifndef __ASSEMBLY__
static inline long
__arc_strncpy_from_user(char *dst, const char __user *src, long count)
{
- long res = count;
+ long res = 0;
char val;
- unsigned int hw_count;
if (count == 0)
return 0;
__asm__ __volatile__(
- " lp 2f \n"
+ " lp 3f \n"
"1: ldb.ab %3, [%2, 1] \n"
- " breq.d %3, 0, 2f \n"
+ " breq.d %3, 0, 3f \n"
" stb.ab %3, [%1, 1] \n"
- "2: sub %0, %6, %4 \n"
- "3: ;nop \n"
+ " add %0, %0, 1 # Num of NON NULL bytes copied \n"
+ "3: \n"
" .section .fixup, \"ax\" \n"
" .align 4 \n"
- "4: mov %0, %5 \n"
+ "4: mov %0, %4 # sets @res as -EFAULT \n"
" j 3b \n"
" .previous \n"
" .section __ex_table, \"a\" \n"
" .align 4 \n"
" .word 1b, 4b \n"
" .previous \n"
- : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count)
- : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */
+ : "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
+ : "g"(-EFAULT), "l"(count)
: "memory");
return res;
#define PAGE_OFFSET (0x80000000)
#else
#define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */
-#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */
+#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
# Pass UTS_MACHINE for user_regset definition
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o entry.o process.o
+obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o
obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o
-obj-y += devtree.o
+obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o
+obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o
obj-$(CONFIG_MODULES) += arcksyms.o module.o
obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_ARC_MCIP) += mcip.o
obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o
DEFINE(TASK_ACT_MM, offsetof(struct task_struct, active_mm));
DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+ DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+ DEFINE(TASK_COMM, offsetof(struct task_struct, comm));
DEFINE(MM_CTXT, offsetof(struct mm_struct, context));
DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
DEFINE(PT_r5, offsetof(struct pt_regs, r5));
DEFINE(PT_r6, offsetof(struct pt_regs, r6));
DEFINE(PT_r7, offsetof(struct pt_regs, r7));
+ DEFINE(PT_ret, offsetof(struct pt_regs, ret));
DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+ DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
return 0;
}
if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
arc_base_baud = core_clk/3;
+ else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
+ arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x) */
else
arc_base_baud = core_clk;
}
--- /dev/null
+/*
+ * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h> /* ARC_{EXTRY,EXIT} */
+#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,TRAP...} */
+#include <asm/errno.h>
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+
+ .cpu HS
+
+#define VECTOR .word
+
+;############################ Vector Table #################################
+
+ .section .vector,"a",@progbits
+ .align 4
+
+# Initial 16 slots are Exception Vectors
+VECTOR stext ; Restart Vector (jump to entry point)
+VECTOR mem_service ; Mem exception
+VECTOR instr_service ; Instrn Error
+VECTOR EV_MachineCheck ; Fatal Machine check
+VECTOR EV_TLBMissI ; Intruction TLB miss
+VECTOR EV_TLBMissD ; Data TLB miss
+VECTOR EV_TLBProtV ; Protection Violation
+VECTOR EV_PrivilegeV ; Privilege Violation
+VECTOR EV_SWI ; Software Breakpoint
+VECTOR EV_Trap ; Trap exception
+VECTOR EV_Extension ; Extn Instruction Exception
+VECTOR EV_DivZero ; Divide by Zero
+VECTOR EV_DCError ; Data Cache Error
+VECTOR EV_Misaligned ; Misaligned Data Access
+VECTOR reserved ; Reserved slots
+VECTOR reserved ; Reserved slots
+
+# Begin Interrupt Vectors
+VECTOR handle_interrupt ; (16) Timer0
+VECTOR handle_interrupt ; unused (Timer1)
+VECTOR handle_interrupt ; unused (WDT)
+VECTOR handle_interrupt ; (19) ICI (inter core interrupt)
+VECTOR handle_interrupt
+VECTOR handle_interrupt
+VECTOR handle_interrupt
+VECTOR handle_interrupt ; (23) End of fixed IRQs
+
+.rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
+ VECTOR handle_interrupt
+.endr
+
+ .section .text, "ax",@progbits
+
+res_service: ; processor restart
+ flag 0x1 ; not implemented
+ nop
+ nop
+
+reserved: ; processor restart
+ rtie ; jump to processor initializations
+
+;##################### Interrupt Handling ##############################
+
+ENTRY(handle_interrupt)
+
+ INTERRUPT_PROLOGUE irq
+
+ clri ; To make status32.IE agree with CPU internal state
+
+ lr r0, [ICAUSE]
+
+ mov blink, ret_from_exception
+
+ b.d arch_do_IRQ
+ mov r1, sp
+
+END(handle_interrupt)
+
+;################### Non TLB Exception Handling #############################
+
+ENTRY(EV_SWI)
+ flag 1
+END(EV_SWI)
+
+ENTRY(EV_DivZero)
+ flag 1
+END(EV_DivZero)
+
+ENTRY(EV_DCError)
+ flag 1
+END(EV_DCError)
+
+ENTRY(EV_Misaligned)
+
+ EXCEPTION_PROLOGUE
+
+ lr r0, [efa] ; Faulting Data address
+ mov r1, sp
+
+ FAKE_RET_FROM_EXCPN
+
+ SAVE_CALLEE_SAVED_USER
+ mov r2, sp ; callee_regs
+
+ bl do_misaligned_access
+
+ ; TBD: optimize - do this only if a callee reg was involved
+ ; either a dst of emulated LD/ST or src with address-writeback
+ RESTORE_CALLEE_SAVED_USER
+
+ b ret_from_exception
+END(EV_Misaligned)
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+ EXCEPTION_PROLOGUE
+
+ lr r0, [efa] ; Faulting Data address
+ mov r1, sp ; pt_regs
+
+ FAKE_RET_FROM_EXCPN
+
+ mov blink, ret_from_exception
+ b do_page_fault
+
+END(EV_TLBProtV)
+
+; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they
+; need to call do_page_fault().
+; ECR in pt_regs provides whether access was R/W/X
+
+.global call_do_page_fault
+.set call_do_page_fault, EV_TLBProtV
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+ ld r0, [sp, PT_status32] ; U/K mode at time of entry
+ lr r10, [AUX_IRQ_ACT]
+
+ bmsk r11, r10, 15 ; AUX_IRQ_ACT.ACTIVE
+ breq r11, 0, .Lexcept_ret ; No intr active, ret from Exception
+
+;####### Return from Intr #######
+
+debug_marker_l1:
+ bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
+
+.Lisr_ret_fast_path:
+ ; Handle special case #1: (Entry via Exception, Return via IRQ)
+ ;
+ ; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
+ ; task now returning to U mode (riding the Intr)
+ ; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP
+ ; won't be switched to correct U mode value (from AUX_SP)
+ ; So force AUX_IRQ_ACT.U for such a case
+
+ btst r0, STATUS_U_BIT ; Z flag set if K (Z clear for U)
+ bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U
+ sr r11, [AUX_IRQ_ACT]
+
+ INTERRUPT_EPILOGUE irq
+ rtie
+
+;####### Return from Exception / pure kernel mode #######
+
+.Lexcept_ret: ; Expects r0 has PT_status32
+
+debug_marker_syscall:
+ EXCEPTION_EPILOGUE
+ rtie
+
+;####### Return from Intr to insn in delay slot #######
+
+; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ)
+;
+; Intr returning to a Delay Slot (DS) insn
+; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
+; entry was via Exception in DS which got preempted in kernel).
+;
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling
+.Lintr_ret_to_delay_slot:
+debug_marker_ds:
+
+ ld r2, [@intr_to_DE_cnt]
+ add r2, r2, 1
+ st r2, [@intr_to_DE_cnt]
+
+ ld r2, [sp, PT_ret]
+ ld r3, [sp, PT_status32]
+
+ bic r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK
+ st r0, [sp, PT_status32]
+
+ mov r1, .Lintr_ret_to_delay_slot_2
+ st r1, [sp, PT_ret]
+
+ st r2, [sp, 0]
+ st r3, [sp, 4]
+
+ b .Lisr_ret_fast_path
+
+.Lintr_ret_to_delay_slot_2:
+ sub sp, sp, SZ_PT_REGS
+ st r9, [sp, -4]
+
+ ld r9, [sp, 0]
+ sr r9, [eret]
+
+ ld r9, [sp, 4]
+ sr r9, [erstatus]
+
+ ld r9, [sp, 8]
+ sr r9, [erbta]
+
+ ld r9, [sp, -4]
+ add sp, sp, SZ_PT_REGS
+ rtie
+
+END(ret_from_exception)
--- /dev/null
+/*
+ * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ * -Userspace unaligned access emulation
+ *
+ * vineetg: Feb 2011 (ptrace low level code fixes)
+ * -traced syscall return code (r0) was not saved into pt_regs for restoring
+ * into user reg-file when traded task rets to user space.
+ * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
+ * were not invoking post-syscall trace hook (jumping directly into
+ * ret_from_system_call)
+ *
+ * vineetg: Nov 2010:
+ * -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
+ * -To maintain the slot size of 8 bytes/vector, added nop, which is
+ * not executed at runtime.
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
+ * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
+ * need ptregs anymore
+ *
+ * Vineetg: Oct 2009
+ * -In a rare scenario, Process gets a Priv-V exception and gets scheduled
+ * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
+ * active (AE bit enabled). This causes a double fault for a subseq valid
+ * exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
+ * Instr Error could also cause similar scenario, so same there as well.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ * Normally CPU does this automatically, however when doing FAKE rtie,
+ * we need to explicitly do this. The problem in macros
+ * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
+ * setting it and not clearing it clears ZOL context
+ *
+ * Vineetg: May 16th, 2008
+ * - r25 now contains the Current Task when in kernel
+ *
+ * Vineetg: Dec 22, 2007
+ * Minor Surgery of Low Level ISR to make it SMP safe
+ * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
+ * - _current_task is made an array of NR_CPUS
+ * - Access of _current_task wrapped inside a macro so that if hardware
+ * team agrees for a dedicated reg, no other code is touched
+ *
+ * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h> /* {EXTRY,EXIT} */
+#include <asm/entry.h>
+#include <asm/irqflags.h>
+
+ .cpu A7
+
+;############################ Vector Table #################################
+
+.macro VECTOR lbl
+#if 1 /* Just in case, build breaks */
+ j \lbl
+#else
+ b \lbl
+ nop
+#endif
+.endm
+
+ .section .vector, "ax",@progbits
+ .align 4
+
+/* Each entry in the vector table must occupy 2 words. Since it is a jump
+ * across sections (.vector to .text) we are gauranteed that 'j somewhere'
+ * will use the 'j limm' form of the intrsuction as long as somewhere is in
+ * a section other than .vector.
+ */
+
+; ********* Critical System Events **********************
+VECTOR res_service ; 0x0, Restart Vector (0x0)
+VECTOR mem_service ; 0x8, Mem exception (0x1)
+VECTOR instr_service ; 0x10, Instrn Error (0x2)
+
+; ******************** Device ISRs **********************
+#ifdef CONFIG_ARC_IRQ3_LV2
+VECTOR handle_interrupt_level2
+#else
+VECTOR handle_interrupt_level1
+#endif
+
+VECTOR handle_interrupt_level1
+
+#ifdef CONFIG_ARC_IRQ5_LV2
+VECTOR handle_interrupt_level2
+#else
+VECTOR handle_interrupt_level1
+#endif
+
+#ifdef CONFIG_ARC_IRQ6_LV2
+VECTOR handle_interrupt_level2
+#else
+VECTOR handle_interrupt_level1
+#endif
+
+.rept 25
+VECTOR handle_interrupt_level1 ; Other devices
+.endr
+
+/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
+
+; ******************** Exceptions **********************
+VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20)
+VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21)
+VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22)
+VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23)
+ ; or Misaligned Access
+VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24)
+VECTOR EV_Trap ; 0x128, Trap exception (0x25)
+VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26)
+
+.rept 24
+VECTOR reserved ; Reserved Exceptions
+.endr
+
+
+;##################### Scratch Mem for IRQ stack switching #############
+
+ARCFP_DATA int1_saved_reg
+ .align 32
+ .type int1_saved_reg, @object
+ .size int1_saved_reg, 4
+int1_saved_reg:
+ .zero 4
+
+/* Each Interrupt level needs its own scratch */
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+ARCFP_DATA int2_saved_reg
+ .type int2_saved_reg, @object
+ .size int2_saved_reg, 4
+int2_saved_reg:
+ .zero 4
+
+#endif
+
+; ---------------------------------------------
+ .section .text, "ax",@progbits
+
+res_service: ; processor restart
+ flag 0x1 ; not implemented
+ nop
+ nop
+
+reserved: ; processor restart
+ rtie ; jump to processor initializations
+
+;##################### Interrupt Handling ##############################
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+; ---------------------------------------------
+; Level 2 ISR: Can interrupt a Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level2)
+
+ INTERRUPT_PROLOGUE 2
+
+ ;------------------------------------------------------
+ ; if L2 IRQ interrupted a L1 ISR, disable preemption
+ ;------------------------------------------------------
+
+ ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
+ bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal
+
+ ; A1 is set in status32_l2
+ ; bump thread_info->preempt_count (Disable preemption)
+ GET_CURR_THR_INFO_FROM_SP r10
+ ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+ add r9, r9, 1
+ st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+1:
+ ;------------------------------------------------------
+ ; setup params for Linux common ISR and invoke it
+ ;------------------------------------------------------
+ lr r0, [icause2]
+ and r0, r0, 0x1f
+
+ bl.d @arch_do_IRQ
+ mov r1, sp
+
+ mov r8,0x2
+ sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
+
+ b ret_from_exception
+
+END(handle_interrupt_level2)
+
+#endif
+
+; ---------------------------------------------
+; Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level1)
+
+ INTERRUPT_PROLOGUE 1
+
+ lr r0, [icause1]
+ and r0, r0, 0x1f
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ ; icause1 needs to be read early, before calling tracing, which
+ ; can clobber scratch regs, hence use of stack to stash it
+ push r0
+ TRACE_ASM_IRQ_DISABLE
+ pop r0
+#endif
+
+ bl.d @arch_do_IRQ
+ mov r1, sp
+
+ mov r8,0x1
+ sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
+
+ b ret_from_exception
+END(handle_interrupt_level1)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+ EXCEPTION_PROLOGUE
+
+ lr r2, [ecr]
+ lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above)
+
+ ; Exception auto-disables further Intr/exceptions.
+ ; Re-enable them by pretending to return from exception
+ ; (so rest of handler executes in pure K mode)
+
+ FAKE_RET_FROM_EXCPN
+
+ mov r1, sp ; Handle to pt_regs
+
+ ;------ (5) Type of Protection Violation? ----------
+ ;
+ ; ProtV Hardware Exception is triggered for Access Faults of 2 types
+ ; -Access Violaton : 00_23_(00|01|02|03)_00
+ ; x r w r+w
+ ; -Unaligned Access : 00_23_04_00
+ ;
+ bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+
+ ;========= (6a) Access Violation Processing ========
+ bl do_page_fault
+ b ret_from_exception
+
+ ;========== (6b) Non aligned access ============
+4:
+
+ SAVE_CALLEE_SAVED_USER
+ mov r2, sp ; callee_regs
+
+ bl do_misaligned_access
+
+ ; TBD: optimize - do this only if a callee reg was involved
+ ; either a dst of emulated LD/ST or src with address-writeback
+ RESTORE_CALLEE_SAVED_USER
+
+ b ret_from_exception
+
+END(EV_TLBProtV)
+
+; Wrapper for Linux page fault handler called from EV_TLBMiss*
+; Very similar to ProtV handler case (6a) above, but avoids the extra checks
+; for Misaligned access
+;
+ENTRY(call_do_page_fault)
+
+ EXCEPTION_PROLOGUE
+ lr r0, [efa] ; Faulting Data address
+ mov r1, sp
+ FAKE_RET_FROM_EXCPN
+
+ mov blink, ret_from_exception
+ b do_page_fault
+
+END(call_do_page_fault)
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+ TRACE_ASM_IRQ_ENABLE
+
+ lr r10, [status32]
+
+ ; Restore REG File. In case multiple Events outstanding,
+ ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+ ; Note that we use realtime STATUS32 (not pt_regs->status32) to
+ ; decide that.
+
+ ; if Returning from Exception
+ btst r10, STATUS_AE_BIT
+ bnz .Lexcep_ret
+
+ ; Not Exception so maybe Interrupts (Level 1 or 2)
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+ ; Level 2 interrupt return Path - from hardware standpoint
+ bbit0 r10, STATUS_A2_BIT, not_level2_interrupt
+
+ ;------------------------------------------------------------------
+ ; However the context returning might not have taken L2 intr itself
+ ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
+ ; Special considerations needed for the context which took L2 intr
+
+ ld r9, [sp, PT_event] ; Ensure this is L2 intr context
+ brne r9, event_IRQ2, 149f
+
+ ;------------------------------------------------------------------
+ ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier
+ ; so that sched doesn't move to new task, causing L1 to be delayed
+ ; undeterministically. Now that we've achieved that, let's reset
+ ; things to what they were, before returning from L2 context
+ ;----------------------------------------------------------------
+
+ ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
+ bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal
+
+ ; decrement thread_info->preempt_count (re-enable preemption)
+ GET_CURR_THR_INFO_FROM_SP r10
+ ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+ ; paranoid check, given A1 was active when A2 happened, preempt count
+ ; must not be 0 because we would have incremented it.
+ ; If this does happen we simply HALT as it means a BUG !!!
+ cmp r9, 0
+ bnz 2f
+ flag 1
+
+2:
+ sub r9, r9, 1
+ st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+149:
+ ;return from level 2
+ INTERRUPT_EPILOGUE 2
+debug_marker_l2:
+ rtie
+
+not_level2_interrupt:
+
+#endif
+
+ bbit0 r10, STATUS_A1_BIT, .Lpure_k_mode_ret
+
+ ;return from level 1
+ INTERRUPT_EPILOGUE 1
+debug_marker_l1:
+ rtie
+
+.Lexcep_ret:
+.Lpure_k_mode_ret:
+
+ ;this case is for syscalls or Exceptions or pure kernel mode
+
+ EXCEPTION_EPILOGUE
+debug_marker_syscall:
+ rtie
+
+END(ret_from_exception)
/*
- * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * (included from entry-<isa>.S
*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * vineetg: May 2011
- * -Userspace unaligned access emulation
- *
- * vineetg: Feb 2011 (ptrace low level code fixes)
- * -traced syscall return code (r0) was not saved into pt_regs for restoring
- * into user reg-file when traded task rets to user space.
- * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
- * were not invoking post-syscall trace hook (jumping directly into
- * ret_from_system_call)
- *
- * vineetg: Nov 2010:
- * -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
- * -To maintain the slot size of 8 bytes/vector, added nop, which is
- * not executed at runtime.
- *
- * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
- * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
- * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
- * need ptregs anymore
- *
- * Vineetg: Oct 2009
- * -In a rare scenario, Process gets a Priv-V exception and gets scheduled
- * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
- * active (AE bit enabled). This causes a double fault for a subseq valid
- * exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
- * Instr Error could also cause similar scenario, so same there as well.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- * Normally CPU does this automatically, however when doing FAKE rtie,
- * we need to explicitly do this. The problem in macros
- * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
- * setting it and not clearing it clears ZOL context
- *
- * Vineetg: May 16th, 2008
- * - r25 now contains the Current Task when in kernel
- *
- * Vineetg: Dec 22, 2007
- * Minor Surgery of Low Level ISR to make it SMP safe
- * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
- * - _current_task is made an array of NR_CPUS
- * - Access of _current_task wrapped inside a macro so that if hardware
- * team agrees for a dedicated reg, no other code is touched
- *
- * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
*/
/*------------------------------------------------------------------
* Global Pointer (gp) r26
* Frame Pointer (fp) r27
* Stack Pointer (sp) r28
- * Interrupt link register (ilink1) r29
- * Interrupt link register (ilink2) r30
* Branch link register (blink) r31
*------------------------------------------------------------------
*/
- .cpu A7
-
-;############################ Vector Table #################################
-
-.macro VECTOR lbl
-#if 1 /* Just in case, build breaks */
- j \lbl
-#else
- b \lbl
- nop
-#endif
-.endm
-
- .section .vector, "ax",@progbits
- .align 4
-
-/* Each entry in the vector table must occupy 2 words. Since it is a jump
- * across sections (.vector to .text) we are gauranteed that 'j somewhere'
- * will use the 'j limm' form of the intrsuction as long as somewhere is in
- * a section other than .vector.
- */
-
-; ********* Critical System Events **********************
-VECTOR res_service ; 0x0, Restart Vector (0x0)
-VECTOR mem_service ; 0x8, Mem exception (0x1)
-VECTOR instr_service ; 0x10, Instrn Error (0x2)
-
-; ******************** Device ISRs **********************
-#ifdef CONFIG_ARC_IRQ3_LV2
-VECTOR handle_interrupt_level2
-#else
-VECTOR handle_interrupt_level1
-#endif
-
-VECTOR handle_interrupt_level1
-
-#ifdef CONFIG_ARC_IRQ5_LV2
-VECTOR handle_interrupt_level2
-#else
-VECTOR handle_interrupt_level1
-#endif
-
-#ifdef CONFIG_ARC_IRQ6_LV2
-VECTOR handle_interrupt_level2
-#else
-VECTOR handle_interrupt_level1
-#endif
-
-.rept 25
-VECTOR handle_interrupt_level1 ; Other devices
-.endr
-
-/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
-
-; ******************** Exceptions **********************
-VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20)
-VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21)
-VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22)
-VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23)
- ; or Misaligned Access
-VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24)
-VECTOR EV_Trap ; 0x128, Trap exception (0x25)
-VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26)
-
-.rept 24
-VECTOR reserved ; Reserved Exceptions
-.endr
-
-#include <linux/linkage.h> /* {EXTRY,EXIT} */
-#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,SYS...} */
-#include <asm/errno.h>
-#include <asm/arcregs.h>
-#include <asm/irqflags.h>
-
-;##################### Scratch Mem for IRQ stack switching #############
-
-ARCFP_DATA int1_saved_reg
- .align 32
- .type int1_saved_reg, @object
- .size int1_saved_reg, 4
-int1_saved_reg:
- .zero 4
-
-/* Each Interrupt level needs its own scratch */
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
-ARCFP_DATA int2_saved_reg
- .type int2_saved_reg, @object
- .size int2_saved_reg, 4
-int2_saved_reg:
- .zero 4
-
-#endif
-
-; ---------------------------------------------
- .section .text, "ax",@progbits
-
-res_service: ; processor restart
- flag 0x1 ; not implemented
- nop
- nop
-
-reserved: ; processor restart
- rtie ; jump to processor initializations
-
-;##################### Interrupt Handling ##############################
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-; ---------------------------------------------
-; Level 2 ISR: Can interrupt a Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level2)
+;################### Special Sys Call Wrappers ##########################
- ; TODO-vineetg for SMP this wont work
- ; free up r9 as scratchpad
- st r9, [@int2_saved_reg]
+ENTRY(sys_clone_wrapper)
+ SAVE_CALLEE_SAVED_USER
+ bl @sys_clone
+ DISCARD_CALLEE_SAVED_USER
- ;Which mode (user/kernel) was the system in when intr occured
- lr r9, [status32_l2]
+ GET_CURR_THR_INFO_FLAGS r10
+ btst r10, TIF_SYSCALL_TRACE
+ bnz tracesys_exit
- SWITCH_TO_KERNEL_STK
- SAVE_ALL_INT2
+ b ret_from_system_call
+END(sys_clone_wrapper)
- ;------------------------------------------------------
- ; if L2 IRQ interrupted a L1 ISR, disable preemption
- ;------------------------------------------------------
+ENTRY(ret_from_fork)
+ ; when the forked child comes here from the __switch_to function
+ ; r0 has the last task pointer.
+ ; put last task in scheduler queue
+ bl @schedule_tail
- ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
- bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal
+ ld r9, [sp, PT_status32]
+ brne r9, 0, 1f
- ; A1 is set in status32_l2
- ; bump thread_info->preempt_count (Disable preemption)
- GET_CURR_THR_INFO_FROM_SP r10
- ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
- add r9, r9, 1
- st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+ jl.d [r14] ; kernel thread entry point
+ mov r0, r13 ; (see PF_KTHREAD block in copy_thread)
1:
- ;------------------------------------------------------
- ; setup params for Linux common ISR and invoke it
- ;------------------------------------------------------
- lr r0, [icause2]
- and r0, r0, 0x1f
-
- bl.d @arch_do_IRQ
- mov r1, sp
-
- mov r8,0x2
- sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
-
- b ret_from_exception
-
-END(handle_interrupt_level2)
-
-#endif
-
-; ---------------------------------------------
-; Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level1)
-
- /* free up r9 as scratchpad */
-#ifdef CONFIG_SMP
- sr r9, [ARC_REG_SCRATCH_DATA0]
-#else
- st r9, [@int1_saved_reg]
-#endif
-
- ;Which mode (user/kernel) was the system in when intr occured
- lr r9, [status32_l1]
-
- SWITCH_TO_KERNEL_STK
- SAVE_ALL_INT1
+ ; Return to user space
+ ; 1. Any forked task (Reach here via BRne above)
+ ; 2. First ever init task (Reach here via return from JL above)
+ ; This is the historic "kernel_execve" use-case, to return to init
+ ; user mode, in a round about way since that is always done from
+ ; a kernel thread which is executed via JL above but always returns
+ ; out whenever kernel_execve (now inline do_fork()) is involved
+ b ret_from_exception
+END(ret_from_fork)
- lr r0, [icause1]
- and r0, r0, 0x1f
+#ifdef CONFIG_ARC_DW2_UNWIND
+; Workaround for bug 94179 (STAR ):
+; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
+; section (.debug_frame) as loadable. So we force it here.
+; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
+; would not work after a clean build due to kernel build system dependencies.
+.section .debug_frame, "wa",@progbits
-#ifdef CONFIG_TRACE_IRQFLAGS
- ; icause1 needs to be read early, before calling tracing, which
- ; can clobber scratch regs, hence use of stack to stash it
- push r0
- TRACE_ASM_IRQ_DISABLE
- pop r0
+; Reset to .text as this file is included in entry-<isa>.S
+.section .text, "ax",@progbits
#endif
- bl.d @arch_do_IRQ
- mov r1, sp
-
- mov r8,0x1
- sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg
-
- b ret_from_exception
-END(handle_interrupt_level1)
-
;################### Non TLB Exception Handling #############################
; ---------------------------------------------
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_insterror_or_kprobe
b ret_from_exception
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_memory_error
b ret_from_exception
END(EV_MachineCheck)
-; ---------------------------------------------
-; Protection Violation Exception Handler
-; ---------------------------------------------
-
-ENTRY(EV_TLBProtV)
-
- EXCEPTION_PROLOGUE
-
- ;---------(3) Save some more regs-----------------
- ; vineetg: Mar 6th: Random Seg Fault issue #1
- ; ecr and efa were not saved in case an Intr sneaks in
- ; after fake rtie
-
- lr r2, [ecr]
- lr r0, [efa] ; Faulting Data address
-
- ; --------(4) Return from CPU Exception Mode ---------
- ; Fake a rtie, but rtie to next label
- ; That way, subsequently, do_page_fault ( ) executes in pure kernel
- ; mode with further Exceptions enabled
-
- FAKE_RET_FROM_EXCPN r9
-
- mov r1, sp
-
- ;------ (5) Type of Protection Violation? ----------
- ;
- ; ProtV Hardware Exception is triggered for Access Faults of 2 types
- ; -Access Violaton : 00_23_(00|01|02|03)_00
- ; x r w r+w
- ; -Unaligned Access : 00_23_04_00
- ;
- bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
-
- ;========= (6a) Access Violation Processing ========
- bl do_page_fault
- b ret_from_exception
-
- ;========== (6b) Non aligned access ============
-4:
-
- SAVE_CALLEE_SAVED_USER
- mov r2, sp ; callee_regs
-
- bl do_misaligned_access
-
- ; TBD: optimize - do this only if a callee reg was involved
- ; either a dst of emulated LD/ST or src with address-writeback
- RESTORE_CALLEE_SAVED_USER
-
- b ret_from_exception
-
-END(EV_TLBProtV)
-
; ---------------------------------------------
; Privilege Violation Exception Handler
; ---------------------------------------------
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_privilege_fault
b ret_from_exception
lr r0, [efa]
mov r1, sp
- FAKE_RET_FROM_EXCPN r9
+ FAKE_RET_FROM_EXCPN
bl do_extension_fault
b ret_from_exception
END(EV_Extension)
-;######################### System Call Tracing #########################
+;################ Trap Handling (Syscall, Breakpoint) ##################
+; ---------------------------------------------
+; syscall Tracing
+; ---------------------------------------------
tracesys:
; save EFA in case tracer wants the PC of traced task
; using ERET won't work since next-PC has already committed
b ret_from_exception ; NOT ret_from_system_call at is saves r0 which
; we'd done before calling post hook above
-;################### Break Point TRAP ##########################
-
- ; ======= (5b) Trap is due to Break-Point =========
-
+; ---------------------------------------------
+; Breakpoint TRAP
+; ---------------------------------------------
trap_with_param:
; stop_pc info by gdb needs this info
; Now that we have read EFA, it is safe to do "fake" rtie
; and get out of CPU exception mode
- FAKE_RET_FROM_EXCPN r11
+ FAKE_RET_FROM_EXCPN
; Save callee regs in case gdb wants to have a look
; SP will grow up by size of CALLEE Reg-File
b ret_from_exception
-;##################### Trap Handling ##############################
-;
-; EV_Trap caused by TRAP_S and TRAP0 instructions.
-;------------------------------------------------------------------
-; (1) System Calls
-; :parameters in r0-r7.
-; :r8 has the system call number
-; (2) Break Points
-;------------------------------------------------------------------
+; ---------------------------------------------
+; syscall TRAP
+; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ---------------------------------------------
ENTRY(EV_Trap)
EXCEPTION_PROLOGUE
- ;------- (4) What caused the Trap --------------
- lr r12, [ecr]
- bmsk.f 0, r12, 7
+ ;============ TRAP 1 :breakpoints
+ ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR)
+ bmsk.f 0, r9, 7
bnz trap_with_param
- ; ======= (5a) Trap is due to System Call ========
+ ;============ TRAP (no param): syscall top level
- ; Before doing anything, return from CPU Exception Mode
- FAKE_RET_FROM_EXCPN r11
+ ; First return from Exception to pure K mode (Exception/IRQs renabled)
+ FAKE_RET_FROM_EXCPN
- ; If syscall tracing ongoing, invoke pre-pos-hooks
+ ; If syscall tracing ongoing, invoke pre-post-hooks
GET_CURR_THR_INFO_FLAGS r10
btst r10, TIF_SYSCALL_TRACE
bnz tracesys ; this never comes back
- ;============ This is normal System Call case ==========
- ; Sys-call num shd not exceed the total system calls avail
+ ;============ Normal syscall case
+
+ ; syscall num shd not exceed the total system calls avail
cmp r8, NR_syscalls
mov.hi r0, -ENOSYS
bhi ret_from_system_call
; Fast Path return to user mode if no pending work
GET_CURR_THR_INFO_FLAGS r9
and.f 0, r9, _TIF_WORK_MASK
- bz restore_regs
+ bz .Lrestore_regs
; --- (Slow Path #1) task preemption ---
bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals
; Can't preempt if preemption disabled
GET_CURR_THR_INFO_FROM_SP r10
ld r8, [r10, THREAD_INFO_PREEMPT_COUNT]
- brne r8, 0, restore_regs
+ brne r8, 0, .Lrestore_regs
; check if this task's NEED_RESCHED flag set
ld r9, [r10, THREAD_INFO_FLAGS]
- bbit0 r9, TIF_NEED_RESCHED, restore_regs
+ bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs
; Invoke PREEMPTION
bl preempt_schedule_irq
; preempt_schedule_irq() always returns with IRQ disabled
#endif
- ; fall through
-
-;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
-;
-; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
-; IRQ shd definitely not happen between now and rtie
-; All 2 entry points to here already disable interrupts
-
-restore_regs :
-
- TRACE_ASM_IRQ_ENABLE
-
- lr r10, [status32]
-
- ; Restore REG File. In case multiple Events outstanding,
- ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
- ; Note that we use realtime STATUS32 (not pt_regs->status32) to
- ; decide that.
-
- ; if Returning from Exception
- bbit0 r10, STATUS_AE_BIT, not_exception
- RESTORE_ALL_SYS
- rtie
-
- ; Not Exception so maybe Interrupts (Level 1 or 2)
-
-not_exception:
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
- ; Level 2 interrupt return Path - from hardware standpoint
- bbit0 r10, STATUS_A2_BIT, not_level2_interrupt
-
- ;------------------------------------------------------------------
- ; However the context returning might not have taken L2 intr itself
- ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
- ; Special considerations needed for the context which took L2 intr
-
- ld r9, [sp, PT_event] ; Ensure this is L2 intr context
- brne r9, event_IRQ2, 149f
-
- ;------------------------------------------------------------------
- ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier
- ; so that sched doesn't move to new task, causing L1 to be delayed
- ; undeterministically. Now that we've achieved that, let's reset
- ; things to what they were, before returning from L2 context
- ;----------------------------------------------------------------
-
- ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs)
- bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal
-
- ; decrement thread_info->preempt_count (re-enable preemption)
- GET_CURR_THR_INFO_FROM_SP r10
- ld r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
- ; paranoid check, given A1 was active when A2 happened, preempt count
- ; must not be 0 because we would have incremented it.
- ; If this does happen we simply HALT as it means a BUG !!!
- cmp r9, 0
- bnz 2f
- flag 1
-
-2:
- sub r9, r9, 1
- st r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-149:
- ;return from level 2
- RESTORE_ALL_INT2
-debug_marker_l2:
- rtie
-
-not_level2_interrupt:
-
-#endif
-
- bbit0 r10, STATUS_A1_BIT, not_level1_interrupt
+ b .Lrestore_regs
- ;return from level 1
+##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S
- RESTORE_ALL_INT1
-debug_marker_l1:
- rtie
-
-not_level1_interrupt:
-
- ;this case is for syscalls or Exceptions (with fake rtie)
-
- RESTORE_ALL_SYS
-debug_marker_syscall:
- rtie
-
-END(ret_from_exception)
-
-ENTRY(ret_from_fork)
- ; when the forked child comes here from the __switch_to function
- ; r0 has the last task pointer.
- ; put last task in scheduler queue
- bl @schedule_tail
-
- ld r9, [sp, PT_status32]
- brne r9, 0, 1f
-
- jl.d [r14] ; kernel thread entry point
- mov r0, r13 ; (see PF_KTHREAD block in copy_thread)
-
-1:
- ; Return to user space
- ; 1. Any forked task (Reach here via BRne above)
- ; 2. First ever init task (Reach here via return from JL above)
- ; This is the historic "kernel_execve" use-case, to return to init
- ; user mode, in a round about way since that is always done from
- ; a kernel thread which is executed via JL above but always returns
- ; out whenever kernel_execve (now inline do_fork()) is involved
- b ret_from_exception
-END(ret_from_fork)
-
-;################### Special Sys Call Wrappers ##########################
-
-ENTRY(sys_clone_wrapper)
- SAVE_CALLEE_SAVED_USER
- bl @sys_clone
- DISCARD_CALLEE_SAVED_USER
-
- GET_CURR_THR_INFO_FLAGS r10
- btst r10, TIF_SYSCALL_TRACE
- bnz tracesys_exit
-
- b ret_from_system_call
-END(sys_clone_wrapper)
-
-#ifdef CONFIG_ARC_DW2_UNWIND
-; Workaround for bug 94179 (STAR ):
-; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
-; section (.debug_frame) as loadable. So we force it here.
-; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
-; would not work after a clean build due to kernel build system dependencies.
-.section .debug_frame, "wa",@progbits
-#endif
1:
.endm
- .cpu A7
-
.section .init.text, "ax",@progbits
.type stext, @function
.globl stext
st.ab 0, [r5, 4]
1:
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
; Uboot - kernel ABI
; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
; r1 = magic number (board identity, unused as of now
; These are handled later in setup_arch()
st r0, [@uboot_tag]
st r2, [@uboot_arg]
+#endif
; setup "current" tsk and optionally cache it in dedicated r25
mov r9, @init_task
--- /dev/null
+/*
+ * Copyright (C) 2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
+#include <asm/irq.h>
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ * -Platform Independent (must for any ARC Core)
+ * -Needed for each CPU (hence not foldable into init_IRQ)
+ */
+void arc_init_IRQ(void)
+{
+ unsigned int tmp;
+
+ struct aux_irq_ctrl {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int res3:18, save_idx_regs:1, res2:1,
+ save_u_to_u:1, save_lp_regs:1, save_blink:1,
+ res:4, save_nr_gpr_pairs:5;
+#else
+ unsigned int save_nr_gpr_pairs:5, res:4,
+ save_blink:1, save_lp_regs:1, save_u_to_u:1,
+ res2:1, save_idx_regs:1, res3:18;
+#endif
+ } ictrl;
+
+ *(unsigned int *)&ictrl = 0;
+
+ ictrl.save_nr_gpr_pairs = 6; /* r0 to r11 (r12 saved manually) */
+ ictrl.save_blink = 1;
+ ictrl.save_lp_regs = 1; /* LP_COUNT, LP_START, LP_END */
+ ictrl.save_u_to_u = 0; /* user ctxt saved on kernel stack */
+ ictrl.save_idx_regs = 1; /* JLI, LDI, EI */
+
+ WRITE_AUX(AUX_IRQ_CTRL, ictrl);
+
+ /* setup status32, don't enable intr yet as kernel doesn't want */
+ tmp = read_aux_reg(0xa);
+ tmp |= ISA_INIT_STATUS_BITS;
+ tmp &= ~STATUS_IE_MASK;
+ asm volatile("flag %0 \n"::"r"(tmp));
+
+ /*
+ * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+ * Typical builds though have only two levels (0-high, 1-low)
+ * Linux by default uses lower prio 1 for most irqs, reserving 0 for
+ * NMI style interrupts in future (say perf)
+ *
+ * Read the intc BCR to confirm that Linux default priority is avail
+ * in h/w
+ *
+ * Note:
+ * IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level
+ * is 0 based.
+ */
+ tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF;
+ if (ARCV2_IRQ_DEF_PRIO > tmp)
+ panic("Linux default irq prio incorrect\n");
+}
+
+static void arcv2_irq_mask(struct irq_data *data)
+{
+ write_aux_reg(AUX_IRQ_SELECT, data->irq);
+ write_aux_reg(AUX_IRQ_ENABLE, 0);
+}
+
+static void arcv2_irq_unmask(struct irq_data *data)
+{
+ write_aux_reg(AUX_IRQ_SELECT, data->irq);
+ write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+void arcv2_irq_enable(struct irq_data *data)
+{
+ /* set default priority */
+ write_aux_reg(AUX_IRQ_SELECT, data->irq);
+ write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+ /*
+ * hw auto enables (linux unmask) all by default
+ * So no need to do IRQ_ENABLE here
+ * XXX: However OSCI LAN need it
+ */
+ write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+static struct irq_chip arcv2_irq_chip = {
+ .name = "ARCv2 core Intc",
+ .irq_mask = arcv2_irq_mask,
+ .irq_unmask = arcv2_irq_unmask,
+ .irq_enable = arcv2_irq_enable
+};
+
+static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ if (irq == TIMER0_IRQ || irq == IPI_IRQ)
+ irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
+ else
+ irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops arcv2_irq_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ .map = arcv2_irq_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+ if (parent)
+ panic("DeviceTree incore intc not a root irq controller\n");
+
+ root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+ &arcv2_irq_ops, NULL);
+
+ if (!root_domain)
+ panic("root irq domain not avail\n");
+
+ /* with this we don't need to export root_domain */
+ irq_set_default_host(root_domain);
+
+ return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ);
--- /dev/null
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include "../../drivers/irqchip/irqchip.h"
+#include <asm/irq.h>
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Platform independent, needed for each CPU (not foldable into init_IRQ)
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ *
+ * what it does ?
+ * -Optionally, setup the High priority Interrupts as Level 2 IRQs
+ */
+void arc_init_IRQ(void)
+{
+ int level_mask = 0;
+
+ /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
+ level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
+ level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
+ level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
+
+ /*
+ * Write to register, even if no LV2 IRQs configured to reset it
+ * in case bootloader had mucked with it
+ */
+ write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+ if (level_mask)
+ pr_info("Level-2 interrupts bitset %x\n", level_mask);
+}
+
+/*
+ * ARC700 core includes a simple on-chip intc supporting
+ * -per IRQ enable/disable
+ * -2 levels of interrupts (high/low)
+ * -all interrupts being level triggered
+ *
+ * To reduce platform code, we assume all IRQs directly hooked-up into intc.
+ * Platforms with external intc, hence cascaded IRQs, are free to over-ride
+ * below, per IRQ.
+ */
+
+static void arc_irq_mask(struct irq_data *data)
+{
+ unsigned int ienb;
+
+ ienb = read_aux_reg(AUX_IENABLE);
+ ienb &= ~(1 << data->irq);
+ write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void arc_irq_unmask(struct irq_data *data)
+{
+ unsigned int ienb;
+
+ ienb = read_aux_reg(AUX_IENABLE);
+ ienb |= (1 << data->irq);
+ write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static struct irq_chip onchip_intc = {
+ .name = "ARC In-core Intc",
+ .irq_mask = arc_irq_mask,
+ .irq_unmask = arc_irq_unmask,
+};
+
+static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ /*
+ * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core
+ * code doesn't own it (like TIMER0). ISS IDU / ezchip define it
+ * in platform header which can't be included here as it goes
+ * against multi-platform image philisophy
+ */
+ if (irq == TIMER0_IRQ)
+ irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
+ else
+ irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops arc_intc_domain_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ .map = arc_intc_domain_map,
+};
+
+static struct irq_domain *root_domain;
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+ if (parent)
+ panic("DeviceTree incore intc not a root irq controller\n");
+
+ root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
+ &arc_intc_domain_ops, NULL);
+
+ if (!root_domain)
+ panic("root irq domain not avail\n");
+
+ /* with this we don't need to export root_domain */
+ irq_set_default_host(root_domain);
+
+ return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
+
+/*
+ * arch_local_irq_enable - Enable interrupts.
+ *
+ * 1. Explicitly called to re-enable interrupts
+ * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
+ * which maybe in hard ISR itself
+ *
+ * Semantics of this function change depending on where it is called from:
+ *
+ * -If called from hard-ISR, it must not invert interrupt priorities
+ * e.g. suppose TIMER is high priority (Level 2) IRQ
+ * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
+ * Here local_irq_enable( ) shd not re-enable lower priority interrupts
+ * -If called from soft-ISR, it must re-enable all interrupts
+ * soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ * must be enabled while they run.
+ * Now hardware context wise we may still be in L2 ISR (not done rtie)
+ * still we must re-enable both L1 and L2 IRQs
+ * Another twist is prev scenario with flow being
+ * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR
+ * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
+ * over-written (this is deficiency in ARC700 Interrupt mechanism)
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */
+
+void arch_local_irq_enable(void)
+{
+
+ unsigned long flags = arch_local_save_flags();
+
+ /* Allow both L1 and L2 at the onset */
+ flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
+
+ /* Called from hard ISR (between irq_enter and irq_exit) */
+ if (in_irq()) {
+
+ /* If in L2 ISR, don't re-enable any further IRQs as this can
+ * cause IRQ priorities to get upside down. e.g. it could allow
+ * L1 be taken while in L2 hard ISR which is wrong not only in
+ * theory, it can also cause the dreaded L1-L2-L1 scenario
+ */
+ if (flags & STATUS_A2_MASK)
+ flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
+
+ /* Even if in L1 ISR, allowe Higher prio L2 IRQs */
+ else if (flags & STATUS_A1_MASK)
+ flags &= ~(STATUS_E1_MASK);
+ }
+
+ /* called from soft IRQ, ideally we want to re-enable all levels */
+
+ else if (in_softirq()) {
+
+ /* However if this is case of L1 interrupted by L2,
+ * re-enabling both may cause whaco L1-L2-L1 scenario
+ * because ARC700 allows level 1 to interrupt an active L2 ISR
+ * Thus we disable both
+ * However some code, executing in soft ISR wants some IRQs
+ * to be enabled so we re-enable L2 only
+ *
+ * How do we determine L1 intr by L2
+ * -A2 is set (means in L2 ISR)
+ * -E1 is set in this ISR's pt_regs->status32 which is
+ * saved copy of status32_l2 when l2 ISR happened
+ */
+ struct pt_regs *pt = get_irq_regs();
+
+ if ((flags & STATUS_A2_MASK) && pt &&
+ (pt->status32 & STATUS_A1_MASK)) {
+ /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
+ flags &= ~(STATUS_E1_MASK);
+ }
+ }
+
+ arch_local_irq_restore(flags);
+}
+
+#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
+
+/*
+ * Simpler version for only 1 level of interrupt
+ * Here we only Worry about Level 1 Bits
+ */
+void arch_local_irq_enable(void)
+{
+ unsigned long flags;
+
+ /*
+ * ARC IDE Drivers tries to re-enable interrupts from hard-isr
+ * context which is simply wrong
+ */
+ if (in_irq()) {
+ WARN_ONCE(1, "IRQ enabled from hard-isr");
+ return;
+ }
+
+ flags = arch_local_save_flags();
+ flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
+ arch_local_irq_restore(flags);
+}
+#endif
+EXPORT_SYMBOL(arch_local_irq_enable);
*/
#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/irqdomain.h>
#include <linux/irqchip.h>
-#include "../../drivers/irqchip/irqchip.h"
-#include <asm/sections.h>
-#include <asm/irq.h>
#include <asm/mach_desc.h>
-/*
- * Early Hardware specific Interrupt setup
- * -Platform independent, needed for each CPU (not foldable into init_IRQ)
- * -Called very early (start_kernel -> setup_arch -> setup_processor)
- *
- * what it does ?
- * -Optionally, setup the High priority Interrupts as Level 2 IRQs
- */
-void arc_init_IRQ(void)
-{
- int level_mask = 0;
-
- /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
- level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
- level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
- level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
-
- /*
- * Write to register, even if no LV2 IRQs configured to reset it
- * in case bootloader had mucked with it
- */
- write_aux_reg(AUX_IRQ_LEV, level_mask);
-
- if (level_mask)
- pr_info("Level-2 interrupts bitset %x\n", level_mask);
-}
-
-/*
- * ARC700 core includes a simple on-chip intc supporting
- * -per IRQ enable/disable
- * -2 levels of interrupts (high/low)
- * -all interrupts being level triggered
- *
- * To reduce platform code, we assume all IRQs directly hooked-up into intc.
- * Platforms with external intc, hence cascaded IRQs, are free to over-ride
- * below, per IRQ.
- */
-
-static void arc_irq_mask(struct irq_data *data)
-{
- unsigned int ienb;
-
- ienb = read_aux_reg(AUX_IENABLE);
- ienb &= ~(1 << data->irq);
- write_aux_reg(AUX_IENABLE, ienb);
-}
-
-static void arc_irq_unmask(struct irq_data *data)
-{
- unsigned int ienb;
-
- ienb = read_aux_reg(AUX_IENABLE);
- ienb |= (1 << data->irq);
- write_aux_reg(AUX_IENABLE, ienb);
-}
-
-static struct irq_chip onchip_intc = {
- .name = "ARC In-core Intc",
- .irq_mask = arc_irq_mask,
- .irq_unmask = arc_irq_unmask,
-};
-
-static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
- irq_hw_number_t hw)
-{
- if (irq == TIMER0_IRQ)
- irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
- else
- irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
-
- return 0;
-}
-
-static const struct irq_domain_ops arc_intc_domain_ops = {
- .xlate = irq_domain_xlate_onecell,
- .map = arc_intc_domain_map,
-};
-
-static struct irq_domain *root_domain;
-
-static int __init
-init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
-{
- if (parent)
- panic("DeviceTree incore intc not a root irq controller\n");
-
- root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0,
- &arc_intc_domain_ops, NULL);
-
- if (!root_domain)
- panic("root irq domain not avail\n");
-
- /* with this we don't need to export root_domain */
- irq_set_default_host(root_domain);
-
- return 0;
-}
-
-IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
-
/*
* Late Interrupt system init called from start_kernel for Boot CPU only
*
enable_percpu_irq(irq, 0);
}
-
-/*
- * arch_local_irq_enable - Enable interrupts.
- *
- * 1. Explicitly called to re-enable interrupts
- * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
- * which maybe in hard ISR itself
- *
- * Semantics of this function change depending on where it is called from:
- *
- * -If called from hard-ISR, it must not invert interrupt priorities
- * e.g. suppose TIMER is high priority (Level 2) IRQ
- * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
- * Here local_irq_enable( ) shd not re-enable lower priority interrupts
- * -If called from soft-ISR, it must re-enable all interrupts
- * soft ISR are low prioity jobs which can be very slow, thus all IRQs
- * must be enabled while they run.
- * Now hardware context wise we may still be in L2 ISR (not done rtie)
- * still we must re-enable both L1 and L2 IRQs
- * Another twist is prev scenario with flow being
- * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR
- * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
- * over-written (this is deficiency in ARC700 Interrupt mechanism)
- */
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */
-
-void arch_local_irq_enable(void)
-{
-
- unsigned long flags;
- flags = arch_local_save_flags();
-
- /* Allow both L1 and L2 at the onset */
- flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-
- /* Called from hard ISR (between irq_enter and irq_exit) */
- if (in_irq()) {
-
- /* If in L2 ISR, don't re-enable any further IRQs as this can
- * cause IRQ priorities to get upside down. e.g. it could allow
- * L1 be taken while in L2 hard ISR which is wrong not only in
- * theory, it can also cause the dreaded L1-L2-L1 scenario
- */
- if (flags & STATUS_A2_MASK)
- flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
-
- /* Even if in L1 ISR, allowe Higher prio L2 IRQs */
- else if (flags & STATUS_A1_MASK)
- flags &= ~(STATUS_E1_MASK);
- }
-
- /* called from soft IRQ, ideally we want to re-enable all levels */
-
- else if (in_softirq()) {
-
- /* However if this is case of L1 interrupted by L2,
- * re-enabling both may cause whaco L1-L2-L1 scenario
- * because ARC700 allows level 1 to interrupt an active L2 ISR
- * Thus we disable both
- * However some code, executing in soft ISR wants some IRQs
- * to be enabled so we re-enable L2 only
- *
- * How do we determine L1 intr by L2
- * -A2 is set (means in L2 ISR)
- * -E1 is set in this ISR's pt_regs->status32 which is
- * saved copy of status32_l2 when l2 ISR happened
- */
- struct pt_regs *pt = get_irq_regs();
- if ((flags & STATUS_A2_MASK) && pt &&
- (pt->status32 & STATUS_A1_MASK)) {
- /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
- flags &= ~(STATUS_E1_MASK);
- }
- }
-
- arch_local_irq_restore(flags);
-}
-
-#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
-
-/*
- * Simpler version for only 1 level of interrupt
- * Here we only Worry about Level 1 Bits
- */
-void arch_local_irq_enable(void)
-{
- unsigned long flags;
-
- /*
- * ARC IDE Drivers tries to re-enable interrupts from hard-isr
- * context which is simply wrong
- */
- if (in_irq()) {
- WARN_ONCE(1, "IRQ enabled from hard-isr");
- return;
- }
-
- flags = arch_local_save_flags();
- flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
- arch_local_irq_restore(flags);
-}
-#endif
-EXPORT_SYMBOL(arch_local_irq_enable);
--- /dev/null
+/*
+ * ARC ARConnect (MultiCore IP) support (formerly known as MCIP)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <asm/mcip.h>
+
+static char smp_cpuinfo_buf[128];
+static int idu_detected;
+
+static DEFINE_RAW_SPINLOCK(mcip_lock);
+
+/*
+ * Any SMP specific init any CPU does when it comes up.
+ * Here we setup the CPU to enable Inter-Processor-Interrupts
+ * Called for each CPU
+ * -Master : init_IRQ()
+ * -Other(s) : start_kernel_secondary()
+ */
+void mcip_init_smp(unsigned int cpu)
+{
+ smp_ipi_irq_setup(cpu, IPI_IRQ);
+}
+
+static void mcip_ipi_send(int cpu)
+{
+ unsigned long flags;
+ int ipi_was_pending;
+
+ /*
+ * NOTE: We must spin here if the other cpu hasn't yet
+ * serviced a previous message. This can burn lots
+ * of time, but we MUST follows this protocol or
+ * ipi messages can be lost!!!
+ * Also, we must release the lock in this loop because
+ * the other side may get to this same loop and not
+ * be able to ack -- thus causing deadlock.
+ */
+
+ do {
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+ ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+ if (ipi_was_pending == 0)
+ break; /* break out but keep lock */
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+ } while (1);
+
+ __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+#ifdef CONFIG_ARC_IPI_DBG
+ if (ipi_was_pending)
+ pr_info("IPI ACK delayed from cpu %d\n", cpu);
+#endif
+}
+
+static void mcip_ipi_clear(int irq)
+{
+ unsigned int cpu, c;
+ unsigned long flags;
+ unsigned int __maybe_unused copy;
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+
+ /* Who sent the IPI */
+ __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
+
+ copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */
+
+ /*
+ * In rare case, multiple concurrent IPIs sent to same target can
+ * possibly be coalesced by MCIP into 1 asserted IRQ, so @cpus can be
+ * "vectored" (multiple bits sets) as opposed to typical single bit
+ */
+ do {
+ c = __ffs(cpu); /* 0,1,2,3 */
+ __mcip_cmd(CMD_INTRPT_GENERATE_ACK, c);
+ cpu &= ~(1U << c);
+ } while (cpu);
+
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+#ifdef CONFIG_ARC_IPI_DBG
+ if (c != __ffs(copy))
+ pr_info("IPIs from %x coalesced to %x\n",
+ copy, raw_smp_processor_id());
+#endif
+}
+
+volatile int wake_flag;
+
+static void mcip_wakeup_cpu(int cpu, unsigned long pc)
+{
+ BUG_ON(cpu == 0);
+ wake_flag = cpu;
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+ while (wake_flag != cpu)
+ ;
+
+ wake_flag = 0;
+ __asm__ __volatile__("j @first_lines_of_secondary \n");
+}
+
+struct plat_smp_ops plat_smp_ops = {
+ .info = smp_cpuinfo_buf,
+ .cpu_kick = mcip_wakeup_cpu,
+ .ipi_send = mcip_ipi_send,
+ .ipi_clear = mcip_ipi_clear,
+};
+
+void mcip_init_early_smp(void)
+{
+#define IS_AVAIL1(var, str) ((var) ? str : "")
+
+ struct mcip_bcr {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad3:8,
+ idu:1, llm:1, num_cores:6,
+ iocoh:1, grtc:1, dbg:1, pad2:1,
+ msg:1, sem:1, ipi:1, pad:1,
+ ver:8;
+#else
+ unsigned int ver:8,
+ pad:1, ipi:1, sem:1, msg:1,
+ pad2:1, dbg:1, grtc:1, iocoh:1,
+ num_cores:6, llm:1, idu:1,
+ pad3:8;
+#endif
+ } mp;
+
+ READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+ sprintf(smp_cpuinfo_buf,
+ "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+ mp.ver, mp.num_cores,
+ IS_AVAIL1(mp.ipi, "IPI "),
+ IS_AVAIL1(mp.idu, "IDU "),
+ IS_AVAIL1(mp.dbg, "DEBUG "),
+ IS_AVAIL1(mp.grtc, "GRTC"));
+
+ idu_detected = mp.idu;
+
+ if (mp.dbg) {
+ __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
+ __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
+ }
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc)
+ panic("kernel trying to use non-existent GRTC\n");
+}
+
+/***************************************************************************
+ * ARCv2 Interrupt Distribution Unit (IDU)
+ *
+ * Connects external "COMMON" IRQs to core intc, providing:
+ * -dynamic routing (IRQ affinity)
+ * -load balancing (Round Robin interrupt distribution)
+ * -1:N distribution
+ *
+ * It physically resides in the MCIP hw block
+ */
+
+#include <linux/irqchip.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include "../../drivers/irqchip/irqchip.h"
+
+/*
+ * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
+ */
+static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask)
+{
+ __mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask);
+}
+
+static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl,
+ unsigned int distr)
+{
+ union {
+ unsigned int word;
+ struct {
+ unsigned int distr:2, pad:2, lvl:1, pad2:27;
+ };
+ } data;
+
+ data.distr = distr;
+ data.lvl = lvl;
+ __mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word);
+}
+
+static void idu_irq_mask(struct irq_data *data)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_unmask(struct irq_data *data)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 0);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static int
+idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f)
+{
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip idu_irq_chip = {
+ .name = "MCIP IDU Intc",
+ .irq_mask = idu_irq_mask,
+ .irq_unmask = idu_irq_unmask,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = idu_irq_set_affinity,
+#endif
+
+};
+
+static int idu_first_irq;
+
+static void idu_cascade_isr(unsigned int core_irq, struct irq_desc *desc)
+{
+ struct irq_domain *domain = irq_desc_get_handler_data(desc);
+ unsigned int idu_irq;
+
+ idu_irq = core_irq - idu_first_irq;
+ generic_handle_irq(irq_find_mapping(domain, idu_irq));
+}
+
+static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
+{
+ irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
+ irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+
+ return 0;
+}
+
+static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+ irq_hw_number_t hwirq = *out_hwirq = intspec[0];
+ int distri = intspec[1];
+ unsigned long flags;
+
+ *out_type = IRQ_TYPE_NONE;
+
+ /* XXX: validate distribution scheme again online cpu mask */
+ if (distri == 0) {
+ /* 0 - Round Robin to all cpus, otherwise 1 bit per core */
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
+ idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+ } else {
+ /*
+ * DEST based distribution for Level Triggered intr can only
+ * have 1 CPU, so generalize it to always contain 1 cpu
+ */
+ int cpu = ffs(distri);
+
+ if (cpu != fls(distri))
+ pr_warn("IDU irq %lx distri mode set to cpu %x\n",
+ hwirq, cpu);
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+ idu_set_dest(hwirq, cpu);
+ idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
+ raw_spin_unlock_irqrestore(&mcip_lock, flags);
+ }
+
+ return 0;
+}
+
+static const struct irq_domain_ops idu_irq_ops = {
+ .xlate = idu_irq_xlate,
+ .map = idu_irq_map,
+};
+
+/*
+ * [16, 23]: Statically assigned always private-per-core (Timers, WDT, IPI)
+ * [24, 23+C]: If C > 0 then "C" common IRQs
+ * [24+C, N]: Not statically assigned, private-per-core
+ */
+
+
+static int __init
+idu_of_init(struct device_node *intc, struct device_node *parent)
+{
+ struct irq_domain *domain;
+ /* Read IDU BCR to confirm nr_irqs */
+ int nr_irqs = of_irq_count(intc);
+ int i, irq;
+
+ if (!idu_detected)
+ panic("IDU not detected, but DeviceTree using it");
+
+ pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs);
+
+ domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+
+ /* Parent interrupts (core-intc) are already mapped */
+
+ for (i = 0; i < nr_irqs; i++) {
+ /*
+ * Return parent uplink IRQs (towards core intc) 24,25,.....
+ * this step has been done before already
+ * however we need it to get the parent virq and set IDU handler
+ * as first level isr
+ */
+ irq = irq_of_parse_and_map(intc, i);
+ if (!i)
+ idu_first_irq = irq;
+
+ irq_set_handler_data(irq, domain);
+ irq_set_chained_handler(irq, idu_cascade_isr);
+ }
+
+ __mcip_cmd(CMD_IDU_ENABLE, 0);
+
+ return 0;
+}
+IRQCHIP_DECLARE(arcv2_idu_intc, "snps,archs-idu-intc", idu_of_init);
static int arc_pmu_device_probe(struct platform_device *pdev)
{
- struct arc_pmu *arc_pmu;
struct arc_reg_pct_build pct_bcr;
struct arc_reg_cc_build cc_bcr;
- int i, j, ret;
+ int i, j;
union cc_name {
struct {
/* ARC 700 PMU does not support sampling events */
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
- ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
-
- return ret;
+ return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
}
#ifdef CONFIG_OF
void arch_cpu_idle(void)
{
/* sleep, but enable all interrupts before committing */
- __asm__("sleep 0x3");
+ if (is_isa_arcompact()) {
+ __asm__("sleep 0x3");
+ } else {
+ __asm__("sleep 0x10");
+ }
}
asmlinkage void ret_from_fork(void);
* [L] ZOL loop inhibited to begin with - cleared by a LP insn
* Interrupts enabled
*/
- regs->status32 = STATUS_U_MASK | STATUS_L_MASK |
- STATUS_E1_MASK | STATUS_E2_MASK;
+ regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
/* bogus seed values for debugging */
regs->lp_start = 0x10;
{
unsigned int eflags;
- if (x->e_machine != EM_ARCOMPACT)
+ if (x->e_machine != EM_ARC_INUSE) {
+ pr_err("ELF not built for %s ISA\n",
+ is_isa_arcompact() ? "ARCompact":"ARCv2");
return 0;
+ }
eflags = x->e_flags;
if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) {
offsetof(struct user_regs_struct, LOC) + 4);
REG_O_ZERO(pad);
- REG_O_CHUNK(scratch, callee, ptregs);
+ REG_O_ONE(scratch.bta, &ptregs->bta);
+ REG_O_ONE(scratch.lp_start, &ptregs->lp_start);
+ REG_O_ONE(scratch.lp_end, &ptregs->lp_end);
+ REG_O_ONE(scratch.lp_count, &ptregs->lp_count);
+ REG_O_ONE(scratch.status32, &ptregs->status32);
+ REG_O_ONE(scratch.ret, &ptregs->ret);
+ REG_O_ONE(scratch.blink, &ptregs->blink);
+ REG_O_ONE(scratch.fp, &ptregs->fp);
+ REG_O_ONE(scratch.gp, &ptregs->r26);
+ REG_O_ONE(scratch.r12, &ptregs->r12);
+ REG_O_ONE(scratch.r11, &ptregs->r11);
+ REG_O_ONE(scratch.r10, &ptregs->r10);
+ REG_O_ONE(scratch.r9, &ptregs->r9);
+ REG_O_ONE(scratch.r8, &ptregs->r8);
+ REG_O_ONE(scratch.r7, &ptregs->r7);
+ REG_O_ONE(scratch.r6, &ptregs->r6);
+ REG_O_ONE(scratch.r5, &ptregs->r5);
+ REG_O_ONE(scratch.r4, &ptregs->r4);
+ REG_O_ONE(scratch.r3, &ptregs->r3);
+ REG_O_ONE(scratch.r2, &ptregs->r2);
+ REG_O_ONE(scratch.r1, &ptregs->r1);
+ REG_O_ONE(scratch.r0, &ptregs->r0);
+ REG_O_ONE(scratch.sp, &ptregs->sp);
+
REG_O_ZERO(pad2);
- REG_O_CHUNK(callee, efa, cregs);
- REG_O_CHUNK(efa, stop_pc, &target->thread.fault_address);
+
+ REG_O_ONE(callee.r25, &cregs->r25);
+ REG_O_ONE(callee.r24, &cregs->r24);
+ REG_O_ONE(callee.r23, &cregs->r23);
+ REG_O_ONE(callee.r22, &cregs->r22);
+ REG_O_ONE(callee.r21, &cregs->r21);
+ REG_O_ONE(callee.r20, &cregs->r20);
+ REG_O_ONE(callee.r19, &cregs->r19);
+ REG_O_ONE(callee.r18, &cregs->r18);
+ REG_O_ONE(callee.r17, &cregs->r17);
+ REG_O_ONE(callee.r16, &cregs->r16);
+ REG_O_ONE(callee.r15, &cregs->r15);
+ REG_O_ONE(callee.r14, &cregs->r14);
+ REG_O_ONE(callee.r13, &cregs->r13);
+
+ REG_O_ONE(efa, &target->thread.fault_address);
if (!ret) {
if (in_brkpt_trap(ptregs)) {
offsetof(struct user_regs_struct, LOC) + 4);
REG_IGNORE_ONE(pad);
- /* TBD: disallow updates to STATUS32 etc*/
- REG_IN_CHUNK(scratch, pad2, ptregs); /* pt_regs[bta..sp] */
+
+ REG_IN_ONE(scratch.bta, &ptregs->bta);
+ REG_IN_ONE(scratch.lp_start, &ptregs->lp_start);
+ REG_IN_ONE(scratch.lp_end, &ptregs->lp_end);
+ REG_IN_ONE(scratch.lp_count, &ptregs->lp_count);
+
+ REG_IGNORE_ONE(scratch.status32);
+
+ REG_IN_ONE(scratch.ret, &ptregs->ret);
+ REG_IN_ONE(scratch.blink, &ptregs->blink);
+ REG_IN_ONE(scratch.fp, &ptregs->fp);
+ REG_IN_ONE(scratch.gp, &ptregs->r26);
+ REG_IN_ONE(scratch.r12, &ptregs->r12);
+ REG_IN_ONE(scratch.r11, &ptregs->r11);
+ REG_IN_ONE(scratch.r10, &ptregs->r10);
+ REG_IN_ONE(scratch.r9, &ptregs->r9);
+ REG_IN_ONE(scratch.r8, &ptregs->r8);
+ REG_IN_ONE(scratch.r7, &ptregs->r7);
+ REG_IN_ONE(scratch.r6, &ptregs->r6);
+ REG_IN_ONE(scratch.r5, &ptregs->r5);
+ REG_IN_ONE(scratch.r4, &ptregs->r4);
+ REG_IN_ONE(scratch.r3, &ptregs->r3);
+ REG_IN_ONE(scratch.r2, &ptregs->r2);
+ REG_IN_ONE(scratch.r1, &ptregs->r1);
+ REG_IN_ONE(scratch.r0, &ptregs->r0);
+ REG_IN_ONE(scratch.sp, &ptregs->sp);
+
REG_IGNORE_ONE(pad2);
- REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */
+
+ REG_IN_ONE(callee.r25, &cregs->r25);
+ REG_IN_ONE(callee.r24, &cregs->r24);
+ REG_IN_ONE(callee.r23, &cregs->r23);
+ REG_IN_ONE(callee.r22, &cregs->r22);
+ REG_IN_ONE(callee.r21, &cregs->r21);
+ REG_IN_ONE(callee.r20, &cregs->r20);
+ REG_IN_ONE(callee.r19, &cregs->r19);
+ REG_IN_ONE(callee.r18, &cregs->r18);
+ REG_IN_ONE(callee.r17, &cregs->r17);
+ REG_IN_ONE(callee.r16, &cregs->r16);
+ REG_IN_ONE(callee.r15, &cregs->r15);
+ REG_IN_ONE(callee.r14, &cregs->r14);
+ REG_IN_ONE(callee.r13, &cregs->r13);
+
REG_IGNORE_ONE(efa); /* efa update invalid */
- REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */
+ REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */
return ret;
}
static const struct user_regset_view user_arc_view = {
.name = UTS_MACHINE,
- .e_machine = EM_ARCOMPACT,
+ .e_machine = EM_ARC_INUSE,
.regsets = arc_regsets,
.n = ARRAY_SIZE(arc_regsets)
};
#define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x))
+unsigned int intr_to_DE_cnt;
+
/* Part of U-boot ABI: see head.S */
int __initdata uboot_tag;
char __initdata *uboot_arg;
cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
- cpu->uncached_base = uncached_space.start << 24;
+ BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE);
READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
read_decode_mmu_bcr();
read_decode_cache_bcr();
- {
+ if (is_isa_arcompact()) {
struct bcr_fp_arcompact sp, dp;
struct bcr_bpu_arcompact bpu;
cpu->bpu.num_cache = 256 << (bpu.ent - 1);
cpu->bpu.num_pred = 256 << (bpu.ent - 1);
}
+ } else {
+ struct bcr_fp_arcv2 spdp;
+ struct bcr_bpu_arcv2 bpu;
+
+ READ_BCR(ARC_REG_FP_V2_BCR, spdp);
+ cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
+ cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
+
+ READ_BCR(ARC_REG_BPU_BCR, bpu);
+ cpu->bpu.ver = bpu.ver;
+ cpu->bpu.full = bpu.ft;
+ cpu->bpu.num_cache = 256 << bpu.bce;
+ cpu->bpu.num_pred = 2048 << bpu.pte;
}
READ_BCR(ARC_REG_AP_BCR, bcr);
{ {0x30, "ARC 700" }, 0x33},
{ {0x34, "ARC 700 R4.10"}, 0x34},
{ {0x35, "ARC 700 R4.11"}, 0x35},
+ { {0x50, "ARC HS38" }, 0x51},
{ {0x00, NULL } }
};
FIX_PTR(cpu);
- {
+ if (is_isa_arcompact()) {
isa_nm = "ARCompact";
be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
atomic = cpu->isa.atomic1;
if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */
atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+ } else {
+ isa_nm = "ARCv2";
+ be = cpu->isa.be;
+ atomic = cpu->isa.atomic;
}
n += scnprintf(buf + n, len - n,
n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
IS_AVAIL1(cpu->timers.t0, "Timer0 "),
IS_AVAIL1(cpu->timers.t1, "Timer1 "),
- IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC));
+ IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ",
+ CONFIG_ARC_HAS_RTC));
- n += i = scnprintf(buf + n, len - n, "%s%s",
- IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC));
+ n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
+ IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+ IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+ IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
if (i)
n += scnprintf(buf + n, len - n, "\n\t\t: ");
+ if (cpu->extn_mpy.ver) {
+ if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */
+ n += scnprintf(buf + n, len - n, "mpy ");
+ } else {
+ int opt = 2; /* stock MPY/MPYH */
+
+ if (cpu->extn_mpy.dsp) /* OPT 7-9 */
+ opt = cpu->extn_mpy.dsp + 6;
+
+ n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
+ }
+ n += scnprintf(buf + n, len - n, "%s",
+ IS_USED(CONFIG_ARC_HAS_HW_MPY));
+ }
+
n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
- IS_AVAIL1(cpu->extn_mpy.ver, "mpy "),
+ IS_AVAIL1(cpu->isa.div_rem, "div_rem "),
IS_AVAIL1(cpu->extn.norm, "norm "),
IS_AVAIL1(cpu->extn.barrel, "barrel-shift "),
IS_AVAIL1(cpu->extn.swap, "swap "),
n += scnprintf(buf + n, len - n,
"Vector Table\t: %#x\nUncached Base\t: %#x\n",
- cpu->vec_base, cpu->uncached_base);
+ cpu->vec_base, ARC_UNCACHED_ADDR_SPACE);
if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
if (!cpu->timers.t1)
panic("Timer1 is not present!\n");
- if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc)
- panic("RTSC is not present\n");
+ if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc)
+ panic("RTC is not present\n");
#ifdef CONFIG_ARC_HAS_DCCM
/*
void __init setup_arch(char **cmdline_p)
{
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
/* make sure that uboot passed pointer to cmdline/dtb is valid */
if (uboot_tag && is_kernel((unsigned long)uboot_arg))
panic("Invalid uboot arg\n");
/* See if u-boot passed an external Device Tree blob */
machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */
- if (!machine_desc) {
+ if (!machine_desc)
+#endif
+ {
/* No, so try the embedded one */
machine_desc = setup_machine_fdt(__dtb_start);
if (!machine_desc)
sigset_t *set)
{
int err;
- err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs,
+ struct user_regs_struct uregs;
+
+ uregs.scratch.bta = regs->bta;
+ uregs.scratch.lp_start = regs->lp_start;
+ uregs.scratch.lp_end = regs->lp_end;
+ uregs.scratch.lp_count = regs->lp_count;
+ uregs.scratch.status32 = regs->status32;
+ uregs.scratch.ret = regs->ret;
+ uregs.scratch.blink = regs->blink;
+ uregs.scratch.fp = regs->fp;
+ uregs.scratch.gp = regs->r26;
+ uregs.scratch.r12 = regs->r12;
+ uregs.scratch.r11 = regs->r11;
+ uregs.scratch.r10 = regs->r10;
+ uregs.scratch.r9 = regs->r9;
+ uregs.scratch.r8 = regs->r8;
+ uregs.scratch.r7 = regs->r7;
+ uregs.scratch.r6 = regs->r6;
+ uregs.scratch.r5 = regs->r5;
+ uregs.scratch.r4 = regs->r4;
+ uregs.scratch.r3 = regs->r3;
+ uregs.scratch.r2 = regs->r2;
+ uregs.scratch.r1 = regs->r1;
+ uregs.scratch.r0 = regs->r0;
+ uregs.scratch.sp = regs->sp;
+
+ err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
sizeof(sf->uc.uc_mcontext.regs.scratch));
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
{
sigset_t set;
int err;
+ struct user_regs_struct uregs;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (!err)
set_current_blocked(&set);
- err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch),
+ err |= __copy_from_user(&uregs.scratch,
+ &(sf->uc.uc_mcontext.regs.scratch),
sizeof(sf->uc.uc_mcontext.regs.scratch));
+ regs->bta = uregs.scratch.bta;
+ regs->lp_start = uregs.scratch.lp_start;
+ regs->lp_end = uregs.scratch.lp_end;
+ regs->lp_count = uregs.scratch.lp_count;
+ regs->status32 = uregs.scratch.status32;
+ regs->ret = uregs.scratch.ret;
+ regs->blink = uregs.scratch.blink;
+ regs->fp = uregs.scratch.fp;
+ regs->r26 = uregs.scratch.gp;
+ regs->r12 = uregs.scratch.r12;
+ regs->r11 = uregs.scratch.r11;
+ regs->r10 = uregs.scratch.r10;
+ regs->r9 = uregs.scratch.r9;
+ regs->r8 = uregs.scratch.r8;
+ regs->r7 = uregs.scratch.r7;
+ regs->r6 = uregs.scratch.r6;
+ regs->r5 = uregs.scratch.r5;
+ regs->r4 = uregs.scratch.r4;
+ regs->r3 = uregs.scratch.r3;
+ regs->r2 = uregs.scratch.r2;
+ regs->r1 = uregs.scratch.r1;
+ regs->r0 = uregs.scratch.r0;
+ regs->sp = uregs.scratch.sp;
+
return err;
}
* their orig user space value when we ret from kernel
*/
regs->r0 = regs->orig_r0;
- regs->ret -= 4;
+ regs->ret -= is_isa_arcv2() ? 2 : 4;
break;
}
}
if (regs->r0 == -ERESTARTNOHAND ||
regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) {
regs->r0 = regs->orig_r0;
- regs->ret -= 4;
+ regs->ret -= is_isa_arcv2() ? 2 : 4;
} else if (regs->r0 == -ERESTART_RESTARTBLOCK) {
regs->r8 = __NR_restart_syscall;
- regs->ret -= 4;
+ regs->ret -= is_isa_arcv2() ? 2 : 4;
}
syscall_wont_restart(regs); /* No more restarts */
}
arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
#endif
-struct plat_smp_ops plat_smp_ops;
+struct plat_smp_ops __weak plat_smp_ops;
/* XXX: per cpu ? Only needed once in early seconday boot */
struct task_struct *secondary_idle_tsk;
/*
* not supported here
*/
-int __init setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
machine_halt();
}
-static inline void __do_IPI(unsigned long msg)
+static inline int __do_IPI(unsigned long msg)
{
+ int rc = 0;
+
switch (msg) {
case IPI_RESCHEDULE:
scheduler_ipi();
break;
default:
- pr_warn("IPI with unexpected msg %ld\n", msg);
+ rc = 1;
}
+
+ return rc;
}
/*
irqreturn_t do_IPI(int irq, void *dev_id)
{
unsigned long pending;
+ unsigned long __maybe_unused copy;
pr_debug("IPI [%ld] received on cpu %d\n",
*this_cpu_ptr(&ipi_data), smp_processor_id());
* "dequeue" the msg corresponding to this IPI (and possibly other
* piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
*/
- pending = xchg(this_cpu_ptr(&ipi_data), 0);
+ copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
do {
unsigned long msg = __ffs(pending);
- __do_IPI(msg);
+ int rc;
+
+ rc = __do_IPI(msg);
+#ifdef CONFIG_ARC_IPI_DBG
+ /* IPI received but no valid @msg */
+ if (rc)
+ pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+#endif
pending &= ~(1U << msg);
} while (pending);
while (1) {
address = UNW_PC(&frame_info);
- if (address && __kernel_text_address(address)) {
- if (consumer_fn(address, arg) == -1)
- break;
- }
+ if (!address || !__kernel_text_address(address))
+ break;
- ret = arc_unwind(&frame_info);
+ if (consumer_fn(address, arg) == -1)
+ break;
- if (ret == 0) {
- frame_info.regs.r63 = frame_info.regs.r31;
- continue;
- } else {
+ ret = arc_unwind(&frame_info);
+ if (ret)
break;
- }
+
+ frame_info.regs.r63 = frame_info.regs.r31;
}
return address; /* return the last address it saw */
* while TIMER1 for free running (clocksource)
*
* Newer ARC700 cores have 64bit clk fetching RTSC insn, preferred over TIMER1
+ * which however is currently broken
*/
#include <linux/spinlock.h>
#include <asm/clk.h>
#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
/* Timer related Aux registers */
#define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */
#define ARC_REG_TIMER0_CTRL 0x22 /* timer 0 control */
/********** Clock Source Device *********/
-#ifdef CONFIG_ARC_HAS_RTSC
+#ifdef CONFIG_ARC_HAS_GRTC
-int arc_counter_setup(void)
+static int arc_counter_setup(void)
{
- /*
- * For SMP this needs to be 0. However Kconfig glue doesn't
- * enable this option for SMP configs
- */
return 1;
}
unsigned long flags;
union {
#ifdef CONFIG_CPU_BIG_ENDIAN
- struct { u32 high, low; };
+ struct { u32 h, l; };
#else
- struct { u32 low, high; };
+ struct { u32 l, h; };
#endif
cycle_t full;
} stamp;
- flags = arch_local_irq_save();
+ local_irq_save(flags);
- __asm__ __volatile(
- " .extCoreRegister tsch, 58, r, cannot_shortcut \n"
- " rtsc %0, 0 \n"
- " mov %1, 0 \n"
- : "=r" (stamp.low), "=r" (stamp.high));
+ __mcip_cmd(CMD_GRTC_READ_LO, 0);
+ stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+ __mcip_cmd(CMD_GRTC_READ_HI, 0);
+ stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK);
- arch_local_irq_restore(flags);
+ local_irq_restore(flags);
return stamp.full;
}
static struct clocksource arc_counter = {
- .name = "ARC RTSC",
- .rating = 300,
+ .name = "ARConnect GRTC",
+ .rating = 400,
.read = arc_counter_read,
- .mask = CLOCKSOURCE_MASK(32),
+ .mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-#else /* !CONFIG_ARC_HAS_RTSC */
+#else
+
+#ifdef CONFIG_ARC_HAS_RTC
+
+#define AUX_RTC_CTRL 0x103
+#define AUX_RTC_LOW 0x104
+#define AUX_RTC_HIGH 0x105
-static bool is_usable_as_clocksource(void)
+int arc_counter_setup(void)
{
-#ifdef CONFIG_SMP
- return 0;
+ write_aux_reg(AUX_RTC_CTRL, 1);
+
+ /* Not usable in SMP */
+ return !IS_ENABLED(CONFIG_SMP);
+}
+
+static cycle_t arc_counter_read(struct clocksource *cs)
+{
+ unsigned long status;
+ union {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ struct { u32 high, low; };
#else
- return 1;
+ struct { u32 low, high; };
#endif
+ cycle_t full;
+ } stamp;
+
+
+ __asm__ __volatile(
+ "1: \n"
+ " lr %0, [AUX_RTC_LOW] \n"
+ " lr %1, [AUX_RTC_HIGH] \n"
+ " lr %2, [AUX_RTC_CTRL] \n"
+ " bbit0.nt %2, 31, 1b \n"
+ : "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
+
+ return stamp.full;
}
+static struct clocksource arc_counter = {
+ .name = "ARCv2 RTC",
+ .rating = 350,
+ .read = arc_counter_read,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+#else /* !CONFIG_ARC_HAS_RTC */
+
/*
* set 32bit TIMER1 to keep counting monotonically and wraparound
*/
write_aux_reg(ARC_REG_TIMER1_CNT, 0);
write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
- return is_usable_as_clocksource();
+ /* Not usable in SMP */
+ return !IS_ENABLED(CONFIG_SMP);
}
static cycle_t arc_counter_read(struct clocksource *cs)
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+#endif
#endif
/********** Clock Event Device *********/
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <asm/arcregs.h>
+#include <asm/irqflags.h>
/*
* Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
n += scnprintf(buf + n, len - n, "\n");
/* because pt_regs has regs reversed: r12..r0, r25..r13 */
- reg_rev--;
+ if (is_isa_arcv2() && start_num == 0)
+ reg_rev++;
+ else
+ reg_rev--;
}
if (start_num != 0)
((cause_code == 0x02) ? "Write" : "EX"));
} else if (vec == ECR_V_INSN_ERR) {
pr_cont("Illegal Insn\n");
+#ifdef CONFIG_ISA_ARCV2
+ } else if (vec == ECR_V_MEM_ERR) {
+ if (cause_code == 0x00)
+ pr_cont("Bus Error from Insn Mem\n");
+ else if (cause_code == 0x10)
+ pr_cont("Bus Error from Data Mem\n");
+ else
+ pr_cont("Bus Error, check PRM\n");
+#endif
} else {
pr_cont("Check Programmer's Manual\n");
}
pr_info("[STAT32]: 0x%08lx", regs->status32);
-#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit : ""
- if (!user_mode(regs))
- pr_cont(" : %2s %2s %2s %2s %2s\n",
- STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1),
- STS_BIT(regs, E2), STS_BIT(regs, E1));
+#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
+#ifdef CONFIG_ISA_ARCOMPACT
+ pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+ (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+ STS_BIT(regs, DE), STS_BIT(regs, AE),
+ STS_BIT(regs, A2), STS_BIT(regs, A1),
+ STS_BIT(regs, E2), STS_BIT(regs, E1));
+#else
+ pr_cont(" : %2s%2s%2s%2s\n",
+ STS_BIT(regs, IE),
+ (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+ STS_BIT(regs, DE), STS_BIT(regs, AE));
+#endif
pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
regs->bta, regs->sp, regs->fp);
pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-lib-y := strchr-700.o strcmp.o strcpy-700.o strlen.o
-lib-y += memcmp.o memcpy-700.o memset.o
+lib-y := strchr-700.o strcpy-700.o strlen.o memcmp.o
+
+lib-$(CONFIG_ISA_ARCOMPACT) += memcpy-700.o memset.o strcmp.o
+lib-$(CONFIG_ISA_ARCV2) += memcpy-archs.o memset-archs.o strcmp-archs.o
ld r4,[r0,0]
ld r5,[r1,0]
lsr.f lp_count,r3,3
+#ifdef CONFIG_ISA_ARCV2
+ /* In ARCv2 a branch can't be the last instruction in a zero overhead
+ * loop.
+ * So we move the branch to the start of the loop, duplicate it
+ * after the end, and set up r12 so that the branch isn't taken
+ * initially.
+ */
+ mov_s r12,WORD2
+ lpne .Loop_end
+ brne WORD2,r12,.Lodd
+ ld WORD2,[r0,4]
+#else
lpne .Loop_end
ld_s WORD2,[r0,4]
+#endif
ld_s r12,[r1,4]
brne r4,r5,.Leven
ld.a r4,[r0,8]
ld.a r5,[r1,8]
+#ifdef CONFIG_ISA_ARCV2
+.Loop_end:
+ brne WORD2,r12,.Lodd
+#else
brne WORD2,r12,.Lodd
.Loop_end:
+#endif
asl_s SHIFT,SHIFT,3
bhs_s .Last_cmp
brne r4,r5,.Leven
bset.cs r0,r0,31
.Lodd:
cmp_s WORD2,r12
-
mov_s r0,1
j_s.d [blink]
bset.cs r0,r0,31
ldb r4,[r0,0]
ldb r5,[r1,0]
lsr.f lp_count,r3
+#ifdef CONFIG_ISA_ARCV2
+ mov r12,r3
lpne .Lbyte_end
+ brne r3,r12,.Lbyte_odd
+#else
+ lpne .Lbyte_end
+#endif
ldb_s r3,[r0,1]
ldb r12,[r1,1]
brne r4,r5,.Lbyte_even
ldb.a r4,[r0,2]
ldb.a r5,[r1,2]
+#ifdef CONFIG_ISA_ARCV2
+.Lbyte_end:
+ brne r3,r12,.Lbyte_odd
+#else
brne r3,r12,.Lbyte_odd
.Lbyte_end:
+#endif
bcc .Lbyte_even
brne r4,r5,.Lbyte_even
ldb_s r3,[r0,1]
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
+# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
+# define MERGE_2(RX,RY,IMM)
+# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
+# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
+#else
+# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
+# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
+# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
+#endif
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define PREFETCH_READ(RX) prefetch [RX, 56]
+# define PREFETCH_WRITE(RX) prefetchw [RX, 64]
+# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
+# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
+# define ZOLSHFT 5
+# define ZOLAND 0x1F
+#else
+# define PREFETCH_READ(RX) prefetch [RX, 28]
+# define PREFETCH_WRITE(RX) prefetchw [RX, 32]
+# define LOADX(DST,RX) ld.ab DST, [RX, 4]
+# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
+# define ZOLSHFT 4
+# define ZOLAND 0xF
+#endif
+
+ENTRY(memcpy)
+ prefetch [r1] ; Prefetch the read location
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don;t clobber ret val
+
+;;; if size <= 8
+ cmp r2, 8
+ bls.d @smallchunk
+ mov.f lp_count, r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @aligndestination
+ ;; LOOP BEGIN
+ ldb.ab r5, [r1,1]
+ sub r2, r2, 1
+ stb.ab r5, [r3,1]
+aligndestination:
+
+;;; Check the alignment of the source
+ and.f r4, r1, 0x03
+ bnz.d @sourceunaligned
+
+;;; CASE 0: Both source and destination are 32bit aligned
+;;; Convert len to Dwords, unfold x4
+ lsr.f lp_count, r2, ZOLSHFT
+ lpnz @copy32_64bytes
+ ;; LOOP START
+ LOADX (r6, r1)
+ PREFETCH_READ (r1)
+ PREFETCH_WRITE (r3)
+ LOADX (r8, r1)
+ LOADX (r10, r1)
+ LOADX (r4, r1)
+ STOREX (r6, r3)
+ STOREX (r8, r3)
+ STOREX (r10, r3)
+ STOREX (r4, r3)
+copy32_64bytes:
+
+ and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
+smallchunk:
+ lpnz @copyremainingbytes
+ ;; LOOP START
+ ldb.ab r5, [r1,1]
+ stb.ab r5, [r3,1]
+copyremainingbytes:
+
+ j [blink]
+;;; END CASE 0
+
+sourceunaligned:
+ cmp r4, 2
+ beq.d @unalignedOffby2
+ sub r2, r2, 1
+
+ bhi.d @unalignedOffby3
+ ldb.ab r5, [r1, 1]
+
+;;; CASE 1: The source is unaligned, off by 1
+ ;; Hence I need to read 1 byte for a 16bit alignment
+ ;; and 2bytes to reach 32bit alignment
+ ldh.ab r6, [r1, 2]
+ sub r2, r2, 2
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+ MERGE_1 (r6, r6, 8)
+ MERGE_2 (r5, r5, 24)
+ or r5, r5, r6
+
+ ;; Both src and dst are aligned
+ lpnz @copy8bytes_1
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 24)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 8)
+
+ SHIFT_1 (r9, r8, 24)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 8)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+copy8bytes_1:
+
+ ;; Write back the remaining 16bits
+ EXTRACT_1 (r6, r5, 16)
+ sth.ab r6, [r3, 2]
+ ;; Write back the remaining 8bits
+ EXTRACT_2 (r5, r5, 16)
+ stb.ab r5, [r3, 1]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @copybytewise_1
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+copybytewise_1:
+ j [blink]
+
+unalignedOffby2:
+;;; CASE 2: The source is unaligned, off by 2
+ ldh.ab r5, [r1, 2]
+ sub r2, r2, 1
+
+ ;; Both src and dst are aligned
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+ asl.nz r5, r5, 16
+#endif
+ lpnz @copy8bytes_2
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 16)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 16)
+
+ SHIFT_1 (r9, r8, 16)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 16)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+copy8bytes_2:
+
+#ifdef __BIG_ENDIAN__
+ lsr.nz r5, r5, 16
+#endif
+ sth.ab r5, [r3, 2]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @copybytewise_2
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+copybytewise_2:
+ j [blink]
+
+unalignedOffby3:
+;;; CASE 3: The source is unaligned, off by 3
+;;; Hence, I need to read 1byte for achieve the 32bit alignment
+
+ ;; Both src and dst are aligned
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+ asl.ne r5, r5, 24
+#endif
+ lpnz @copy8bytes_3
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetch [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 8)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 24)
+
+ SHIFT_1 (r9, r8, 8)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 24)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+copy8bytes_3:
+
+#ifdef __BIG_ENDIAN__
+ lsr.nz r5, r5, 24
+#endif
+ stb.ab r5, [r3, 1]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @copybytewise_3
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+copybytewise_3:
+ j [blink]
+
+END(memcpy)
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#undef PREALLOC_NOT_AVAIL
+
+#ifdef PREALLOC_NOT_AVAIL
+#define PREWRITE(A,B) prefetchw [(A),(B)]
+#else
+#define PREWRITE(A,B) prealloc [(A),(B)]
+#endif
+
+ENTRY(memset)
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don't clobber ret val
+
+;;; if length < 8
+ brls.d.nt r2, 8, .Lsmallchunk
+ mov.f lp_count,r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @.Laligndestination
+ ;; LOOP BEGIN
+ stb.ab r1, [r3,1]
+ sub r2, r2, 1
+.Laligndestination:
+
+;;; Destination is aligned
+ and r1, r1, 0xFF
+ asl r4, r1, 8
+ or r4, r4, r1
+ asl r5, r4, 16
+ or r5, r5, r4
+ mov r4, r5
+
+ sub3 lp_count, r2, 8
+ cmp r2, 64
+ bmsk.hi r2, r2, 5
+ mov.ls lp_count, 0
+ add3.hi r2, r2, 8
+
+;;; Convert len to Dwords, unfold x8
+ lsr.f lp_count, lp_count, 6
+ lpnz @.Lset64bytes
+ ;; LOOP START
+ PREWRITE(r3, 64) ;Prefetch the next write location
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+.Lset64bytes:
+
+ lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
+ lpnz .Lset32bytes
+ ;; LOOP START
+ prefetchw [r3, 32] ;Prefetch the next write location
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+.Lset32bytes:
+
+ and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
+.Lsmallchunk:
+ lpnz .Lcopy3bytes
+ ;; LOOP START
+ stb.ab r1, [r3, 1]
+.Lcopy3bytes:
+
+ j [blink]
+
+END(memset)
+
+ENTRY(memzero)
+ ; adjust bzero args to memset args
+ mov r2, r1
+ b.d memset ;tail call so need to tinker with blink
+ mov r1, 0
+END(memzero)
--- /dev/null
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(strcmp)
+ or r2, r0, r1
+ bmsk_s r2, r2, 1
+ brne r2, 0, @.Lcharloop
+
+;;; s1 and s2 are word aligned
+ ld.ab r2, [r0, 4]
+
+ mov_s r12, 0x01010101
+ ror r11, r12
+ .align 4
+.LwordLoop:
+ ld.ab r3, [r1, 4]
+ ;; Detect NULL char in str1
+ sub r4, r2, r12
+ ld.ab r5, [r0, 4]
+ bic r4, r4, r2
+ and r4, r4, r11
+ brne.d.nt r4, 0, .LfoundNULL
+ ;; Check if the read locations are the same
+ cmp r2, r3
+ beq.d .LwordLoop
+ mov.eq r2, r5
+
+ ;; A match is found, spot it out
+#ifdef __LITTLE_ENDIAN__
+ swape r3, r3
+ mov_s r0, 1
+ swape r2, r2
+#else
+ mov_s r0, 1
+#endif
+ cmp_s r2, r3
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.LfoundNULL:
+#ifdef __BIG_ENDIAN__
+ swape r4, r4
+ swape r2, r2
+ swape r3, r3
+#endif
+ ;; Find null byte
+ ffs r0, r4
+ bmsk r2, r2, r0
+ bmsk r3, r3, r0
+ swape r2, r2
+ swape r3, r3
+ ;; make the return value
+ sub.f r0, r2, r3
+ mov.hi r0, 1
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.Lcharloop:
+ ldb.ab r2, [r0, 1]
+ ldb.ab r3, [r1, 1]
+ nop
+ breq r2, 0, .Lcmpend
+ breq r2, r3, .Lcharloop
+
+ .align 4
+.Lcmpend:
+ j_s.d [blink]
+ sub r0, r2, r3
+END(strcmp)
#
obj-y := extable.o ioremap.o dma.o fault.o init.o
-obj-y += tlb.o tlbex.o cache_arc700.o mmap.o
+obj-y += tlb.o tlbex.o cache.o mmap.o
--- /dev/null
+/*
+ * ARC Cache Management
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/setup.h>
+
+static int l2_line_sz;
+
+void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop);
+
+char *arc_cache_mumbojumbo(int c, char *buf, int len)
+{
+ int n = 0;
+ struct cpuinfo_arc_cache *p;
+
+#define PR_CACHE(p, cfg, str) \
+ if (!(p)->ver) \
+ n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
+ else \
+ n += scnprintf(buf + n, len - n, \
+ str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \
+ (p)->sz_k, (p)->assoc, (p)->line_len, \
+ (p)->vipt ? "VIPT" : "PIPT", \
+ (p)->alias ? " aliasing" : "", \
+ IS_ENABLED(cfg) ? "" : " (not used)");
+
+ PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
+ PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+
+ p = &cpuinfo_arc700[c].slc;
+ if (p->ver)
+ n += scnprintf(buf + n, len - n,
+ "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
+
+ return buf;
+}
+
+/*
+ * Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation done here, simply read/convert the BCRs
+ */
+void read_decode_cache_bcr(void)
+{
+ struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc;
+ unsigned int cpu = smp_processor_id();
+ struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+ unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+ } ibcr, dbcr;
+
+ struct bcr_generic sbcr;
+
+ struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+ unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+ } slc_cfg;
+
+ p_ic = &cpuinfo_arc700[cpu].icache;
+ READ_BCR(ARC_REG_IC_BCR, ibcr);
+
+ if (!ibcr.ver)
+ goto dc_chk;
+
+ if (ibcr.ver <= 3) {
+ BUG_ON(ibcr.config != 3);
+ p_ic->assoc = 2; /* Fixed to 2w set assoc */
+ } else if (ibcr.ver >= 4) {
+ p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+ }
+
+ p_ic->line_len = 8 << ibcr.line_len;
+ p_ic->sz_k = 1 << (ibcr.sz - 1);
+ p_ic->ver = ibcr.ver;
+ p_ic->vipt = 1;
+ p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+
+dc_chk:
+ p_dc = &cpuinfo_arc700[cpu].dcache;
+ READ_BCR(ARC_REG_DC_BCR, dbcr);
+
+ if (!dbcr.ver)
+ goto slc_chk;
+
+ if (dbcr.ver <= 3) {
+ BUG_ON(dbcr.config != 2);
+ p_dc->assoc = 4; /* Fixed to 4w set assoc */
+ p_dc->vipt = 1;
+ p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+ } else if (dbcr.ver >= 4) {
+ p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
+ p_dc->vipt = 0;
+ p_dc->alias = 0; /* PIPT so can't VIPT alias */
+ }
+
+ p_dc->line_len = 16 << dbcr.line_len;
+ p_dc->sz_k = 1 << (dbcr.sz - 1);
+ p_dc->ver = dbcr.ver;
+
+slc_chk:
+ if (!is_isa_arcv2())
+ return;
+
+ p_slc = &cpuinfo_arc700[cpu].slc;
+ READ_BCR(ARC_REG_SLC_BCR, sbcr);
+ if (sbcr.ver) {
+ READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+ p_slc->ver = sbcr.ver;
+ p_slc->sz_k = 128 << slc_cfg.sz;
+ l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+ }
+}
+
+/*
+ * Line Operation on {I,D}-Cache
+ */
+
+#define OP_INV 0x1
+#define OP_FLUSH 0x2
+#define OP_FLUSH_N_INV 0x3
+#define OP_INV_IC 0x4
+
+/*
+ * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ *
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
+ *
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
+
+static inline
+void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
+{
+ unsigned int aux_cmd;
+ int num_lines;
+ const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+ if (op == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ } else {
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+ }
+
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ vaddr &= CACHE_LINE_MASK;
+ }
+
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+ /* MMUv2 and before: paddr contains stuffed vaddrs bits */
+ paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+
+ while (num_lines-- > 0) {
+ write_aux_reg(aux_cmd, paddr);
+ paddr += L1_CACHE_BYTES;
+ }
+}
+
+static inline
+void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
+{
+ unsigned int aux_cmd, aux_tag;
+ int num_lines;
+ const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+ if (op == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ aux_tag = ARC_REG_IC_PTAG;
+ } else {
+ aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+ aux_tag = ARC_REG_DC_PTAG;
+ }
+
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ vaddr &= CACHE_LINE_MASK;
+ }
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+ /*
+ * MMUv3, cache ops require paddr in PTAG reg
+ * if V-P const for loop, PTAG can be written once outside loop
+ */
+ if (full_page)
+ write_aux_reg(aux_tag, paddr);
+
+ while (num_lines-- > 0) {
+ if (!full_page) {
+ write_aux_reg(aux_tag, paddr);
+ paddr += L1_CACHE_BYTES;
+ }
+
+ write_aux_reg(aux_cmd, vaddr);
+ vaddr += L1_CACHE_BYTES;
+ }
+}
+
+/*
+ * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
+ * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ *
+ * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
+ * specified in PTAG (similar to MMU v3)
+ */
+static inline
+void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop)
+{
+ unsigned int aux_cmd;
+ int num_lines;
+ const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+ if (cacheop == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ } else {
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+ }
+
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page_op) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ }
+
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+ while (num_lines-- > 0) {
+ write_aux_reg(aux_cmd, paddr);
+ paddr += L1_CACHE_BYTES;
+ }
+}
+
+#if (CONFIG_ARC_MMU_VER < 3)
+#define __cache_line_loop __cache_line_loop_v2
+#elif (CONFIG_ARC_MMU_VER == 3)
+#define __cache_line_loop __cache_line_loop_v3
+#elif (CONFIG_ARC_MMU_VER > 3)
+#define __cache_line_loop __cache_line_loop_v4
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+
+/***************************************************************
+ * Machine specific helpers for Entire D-Cache or Per Line ops
+ */
+
+static inline void __before_dc_op(const int op)
+{
+ if (op == OP_FLUSH_N_INV) {
+ /* Dcache provides 2 cmd: FLUSH or INV
+ * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+ * flush-n-inv is achieved by INV cmd but with IM=1
+ * So toggle INV sub-mode depending on op request and default
+ */
+ const unsigned int ctl = ARC_REG_DC_CTRL;
+ write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
+ }
+}
+
+static inline void __after_dc_op(const int op)
+{
+ if (op & OP_FLUSH) {
+ const unsigned int ctl = ARC_REG_DC_CTRL;
+ unsigned int reg;
+
+ /* flush / flush-n-inv both wait */
+ while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
+ ;
+
+ /* Switch back to default Invalidate mode */
+ if (op == OP_FLUSH_N_INV)
+ write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
+ }
+}
+
+/*
+ * Operation on Entire D-Cache
+ * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * Note that constant propagation ensures all the checks are gone
+ * in generated code
+ */
+static inline void __dc_entire_op(const int op)
+{
+ int aux;
+
+ __before_dc_op(op);
+
+ if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */
+ aux = ARC_REG_DC_IVDC;
+ else
+ aux = ARC_REG_DC_FLSH;
+
+ write_aux_reg(aux, 0x1);
+
+ __after_dc_op(op);
+}
+
+/* For kernel mappings cache operation: index is same as paddr */
+#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op)
+
+/*
+ * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
+ */
+static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ __before_dc_op(op);
+
+ __cache_line_loop(paddr, vaddr, sz, op);
+
+ __after_dc_op(op);
+
+ local_irq_restore(flags);
+}
+
+#else
+
+#define __dc_entire_op(op)
+#define __dc_line_op(paddr, vaddr, sz, op)
+#define __dc_line_op_k(paddr, sz, op)
+
+#endif /* CONFIG_ARC_HAS_DCACHE */
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+
+static inline void __ic_entire_inv(void)
+{
+ write_aux_reg(ARC_REG_IC_IVIC, 1);
+ read_aux_reg(ARC_REG_IC_CTRL); /* blocks */
+}
+
+static inline void
+__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
+ local_irq_restore(flags);
+}
+
+#ifndef CONFIG_SMP
+
+#define __ic_line_inv_vaddr(p, v, s) __ic_line_inv_vaddr_local(p, v, s)
+
+#else
+
+struct ic_inv_args {
+ unsigned long paddr, vaddr;
+ int sz;
+};
+
+static void __ic_line_inv_vaddr_helper(void *info)
+{
+ struct ic_inv_args *ic_inv = info;
+
+ __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
+}
+
+static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz)
+{
+ struct ic_inv_args ic_inv = {
+ .paddr = paddr,
+ .vaddr = vaddr,
+ .sz = sz
+ };
+
+ on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
+}
+
+#endif /* CONFIG_SMP */
+
+#else /* !CONFIG_ARC_HAS_ICACHE */
+
+#define __ic_entire_inv()
+#define __ic_line_inv_vaddr(pstart, vstart, sz)
+
+#endif /* CONFIG_ARC_HAS_ICACHE */
+
+noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+ unsigned long flags;
+ unsigned int ctrl;
+
+ local_irq_save(flags);
+
+ /*
+ * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
+ * - b'000 (default) is Flush,
+ * - b'001 is Invalidate if CTRL.IM == 0
+ * - b'001 is Flush-n-Invalidate if CTRL.IM == 1
+ */
+ ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+ /* Don't rely on default value of IM bit */
+ if (!(op & OP_FLUSH)) /* i.e. OP_INV */
+ ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */
+ else
+ ctrl |= SLC_CTRL_IM;
+
+ if (op & OP_INV)
+ ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */
+ else
+ ctrl &= ~SLC_CTRL_RGN_OP_INV;
+
+ write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+ /*
+ * Lower bits are ignored, no need to clip
+ * END needs to be setup before START (latter triggers the operation)
+ * END can't be same as START, so add (l2_line_sz - 1) to sz
+ */
+ write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
+ write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+
+ while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+ local_irq_restore(flags);
+#endif
+}
+
+static inline int need_slc_flush(void)
+{
+ return is_isa_arcv2() && l2_line_sz;
+}
+
+/***********************************************************
+ * Exported APIs
+ */
+
+/*
+ * Handle cache congruency of kernel and userspace mappings of page when kernel
+ * writes-to/reads-from
+ *
+ * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
+ * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
+ * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
+ * -In SMP, if hardware caches are coherent
+ *
+ * There's a corollary case, where kernel READs from a userspace mapped page.
+ * If the U-mapping is not congruent to to K-mapping, former needs flushing.
+ */
+void flush_dcache_page(struct page *page)
+{
+ struct address_space *mapping;
+
+ if (!cache_is_vipt_aliasing()) {
+ clear_bit(PG_dc_clean, &page->flags);
+ return;
+ }
+
+ /* don't handle anon pages here */
+ mapping = page_mapping(page);
+ if (!mapping)
+ return;
+
+ /*
+ * pagecache page, file not yet mapped to userspace
+ * Make a note that K-mapping is dirty
+ */
+ if (!mapping_mapped(mapping)) {
+ clear_bit(PG_dc_clean, &page->flags);
+ } else if (page_mapped(page)) {
+
+ /* kernel reading from page with U-mapping */
+ unsigned long paddr = (unsigned long)page_address(page);
+ unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
+
+ if (addr_not_cache_congruent(paddr, vaddr))
+ __flush_dcache_page(paddr, vaddr);
+ }
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+
+ if (need_slc_flush())
+ slc_op(start, sz, OP_FLUSH_N_INV);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_INV);
+
+ if (need_slc_flush())
+ slc_op(start, sz, OP_INV);
+}
+EXPORT_SYMBOL(dma_cache_inv);
+
+void dma_cache_wback(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+
+ if (need_slc_flush())
+ slc_op(start, sz, OP_FLUSH);
+}
+EXPORT_SYMBOL(dma_cache_wback);
+
+/*
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
+ * This is called on insmod, with kernel virtual address for CODE of
+ * the module. ARC cache maintenance ops require PHY address thus we
+ * need to convert vmalloc addr to PHY addr
+ */
+void flush_icache_range(unsigned long kstart, unsigned long kend)
+{
+ unsigned int tot_sz;
+
+ WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
+
+ /* Shortcut for bigger flush ranges.
+ * Here we don't care if this was kernel virtual or phy addr
+ */
+ tot_sz = kend - kstart;
+ if (tot_sz > PAGE_SIZE) {
+ flush_cache_all();
+ return;
+ }
+
+ /* Case: Kernel Phy addr (0x8000_0000 onwards) */
+ if (likely(kstart > PAGE_OFFSET)) {
+ /*
+ * The 2nd arg despite being paddr will be used to index icache
+ * This is OK since no alternate virtual mappings will exist
+ * given the callers for this case: kprobe/kgdb in built-in
+ * kernel code only.
+ */
+ __sync_icache_dcache(kstart, kstart, kend - kstart);
+ return;
+ }
+
+ /*
+ * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
+ * (1) ARC Cache Maintenance ops only take Phy addr, hence special
+ * handling of kernel vaddr.
+ *
+ * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
+ * it still needs to handle a 2 page scenario, where the range
+ * straddles across 2 virtual pages and hence need for loop
+ */
+ while (tot_sz > 0) {
+ unsigned int off, sz;
+ unsigned long phy, pfn;
+
+ off = kstart % PAGE_SIZE;
+ pfn = vmalloc_to_pfn((void *)kstart);
+ phy = (pfn << PAGE_SHIFT) + off;
+ sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
+ __sync_icache_dcache(phy, kstart, sz);
+ kstart += sz;
+ tot_sz -= sz;
+ }
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+/*
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
+ * However in one instance, when called by kprobe (for a breakpt in
+ * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ * use a paddr to index the cache (despite VIPT). This is fine since since a
+ * builtin kernel page will not have any virtual mappings.
+ * kprobe on loadable module will be kernel vaddr.
+ */
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
+{
+ __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
+ __ic_line_inv_vaddr(paddr, vaddr, len);
+}
+
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
+{
+ __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+}
+
+/*
+ * wrapper to clearout kernel or userspace mappings of a page
+ * For kernel mappings @vaddr == @paddr
+ */
+void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+{
+ __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
+}
+
+noinline void flush_cache_all(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ __ic_entire_inv();
+ __dc_entire_op(OP_FLUSH_N_INV);
+
+ local_irq_restore(flags);
+
+}
+
+#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+ flush_cache_all();
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
+ unsigned long pfn)
+{
+ unsigned int paddr = pfn << PAGE_SHIFT;
+
+ u_vaddr &= PAGE_MASK;
+
+ __flush_dcache_page(paddr, u_vaddr);
+
+ if (vma->vm_flags & VM_EXEC)
+ __inv_icache_page(paddr, u_vaddr);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ flush_cache_all();
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long u_vaddr)
+{
+ /* TBD: do we really need to clear the kernel mapping */
+ __flush_dcache_page(page_address(page), u_vaddr);
+ __flush_dcache_page(page_address(page), page_address(page));
+
+}
+
+#endif
+
+void copy_user_highpage(struct page *to, struct page *from,
+ unsigned long u_vaddr, struct vm_area_struct *vma)
+{
+ unsigned long kfrom = (unsigned long)page_address(from);
+ unsigned long kto = (unsigned long)page_address(to);
+ int clean_src_k_mappings = 0;
+
+ /*
+ * If SRC page was already mapped in userspace AND it's U-mapping is
+ * not congruent with K-mapping, sync former to physical page so that
+ * K-mapping in memcpy below, sees the right data
+ *
+ * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
+ * equally valid for SRC page as well
+ */
+ if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
+ __flush_dcache_page(kfrom, u_vaddr);
+ clean_src_k_mappings = 1;
+ }
+
+ copy_page((void *)kto, (void *)kfrom);
+
+ /*
+ * Mark DST page K-mapping as dirty for a later finalization by
+ * update_mmu_cache(). Although the finalization could have been done
+ * here as well (given that both vaddr/paddr are available).
+ * But update_mmu_cache() already has code to do that for other
+ * non copied user pages (e.g. read faults which wire in pagecache page
+ * directly).
+ */
+ clear_bit(PG_dc_clean, &to->flags);
+
+ /*
+ * if SRC was already usermapped and non-congruent to kernel mapping
+ * sync the kernel mapping back to physical page
+ */
+ if (clean_src_k_mappings) {
+ __flush_dcache_page(kfrom, kfrom);
+ set_bit(PG_dc_clean, &from->flags);
+ } else {
+ clear_bit(PG_dc_clean, &from->flags);
+ }
+}
+
+void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
+{
+ clear_page(to);
+ clear_bit(PG_dc_clean, &page->flags);
+}
+
+
+/**********************************************************************
+ * Explicit Cache flush request from user space via syscall
+ * Needed for JITs which generate code on the fly
+ */
+SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
+{
+ /* TBD: optimize this */
+ flush_cache_all();
+ return 0;
+}
+
+void arc_cache_init(void)
+{
+ unsigned int __maybe_unused cpu = smp_processor_id();
+ char str[256];
+
+ printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+ struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+ if (!ic->ver)
+ panic("cache support enabled but non-existent cache\n");
+
+ if (ic->line_len != L1_CACHE_BYTES)
+ panic("ICache line [%d] != kernel Config [%d]",
+ ic->line_len, L1_CACHE_BYTES);
+
+ if (ic->ver != CONFIG_ARC_MMU_VER)
+ panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+ ic->ver, CONFIG_ARC_MMU_VER);
+
+ /*
+ * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+ * pair to provide vaddr/paddr respectively, just as in MMU v3
+ */
+ if (is_isa_arcv2() && ic->alias)
+ _cache_line_loop_ic_fn = __cache_line_loop_v3;
+ else
+ _cache_line_loop_ic_fn = __cache_line_loop;
+ }
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+ struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+
+ if (!dc->ver)
+ panic("cache support enabled but non-existent cache\n");
+
+ if (dc->line_len != L1_CACHE_BYTES)
+ panic("DCache line [%d] != kernel Config [%d]",
+ dc->line_len, L1_CACHE_BYTES);
+
+ /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
+ if (is_isa_arcompact()) {
+ int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+
+ if (dc->alias && !handled)
+ panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ else if (!dc->alias && handled)
+ panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ }
+ }
+}
+++ /dev/null
-/*
- * ARC700 VIPT Cache Management
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
- * -flush_cache_dup_mm (fork)
- * -likewise for flush_cache_mm (exit/execve)
- * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
- *
- * vineetg: Apr 2011
- * -Now that MMU can support larger pg sz (16K), the determiniation of
- * aliasing shd not be based on assumption of 8k pg
- *
- * vineetg: Mar 2011
- * -optimised version of flush_icache_range( ) for making I/D coherent
- * when vaddr is available (agnostic of num of aliases)
- *
- * vineetg: Mar 2011
- * -Added documentation about I-cache aliasing on ARC700 and the way it
- * was handled up until MMU V2.
- * -Spotted a three year old bug when killing the 4 aliases, which needs
- * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
- * instead of paddr | {0x00, 0x01, 0x10, 0x11}
- * (Rajesh you owe me one now)
- *
- * vineetg: Dec 2010
- * -Off-by-one error when computing num_of_lines to flush
- * This broke signal handling with bionic which uses synthetic sigret stub
- *
- * vineetg: Mar 2010
- * -GCC can't generate ZOL for core cache flush loops.
- * Conv them into iterations based as opposed to while (start < end) types
- *
- * Vineetg: July 2009
- * -In I-cache flush routine we used to chk for aliasing for every line INV.
- * Instead now we setup routines per cache geometry and invoke them
- * via function pointers.
- *
- * Vineetg: Jan 2009
- * -Cache Line flush routines used to flush an extra line beyond end addr
- * because check was while (end >= start) instead of (end > start)
- * =Some call sites had to work around by doing -1, -4 etc to end param
- * =Some callers didnt care. This was spec bad in case of INV routines
- * which would discard valid data (cause of the horrible ext2 bug
- * in ARC IDE driver)
- *
- * vineetg: June 11th 2008: Fixed flush_icache_range( )
- * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
- * to be flushed, which it was not doing.
- * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
- * however ARC cache maintenance OPs require PHY addr. Thus need to do
- * vmalloc_to_phy.
- * -Also added optimisation there, that for range > PAGE SIZE we flush the
- * entire cache in one shot rather than line by line. For e.g. a module
- * with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
- * while cache is only 16 or 32k.
- */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/mmu_context.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/pagemap.h>
-#include <asm/cacheflush.h>
-#include <asm/cachectl.h>
-#include <asm/setup.h>
-
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
- int n = 0;
-
-#define PR_CACHE(p, cfg, str) \
- if (!(p)->ver) \
- n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
- else \
- n += scnprintf(buf + n, len - n, \
- str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \
- (p)->sz_k, (p)->assoc, (p)->line_len, \
- (p)->vipt ? "VIPT" : "PIPT", \
- (p)->alias ? " aliasing" : "", \
- IS_ENABLED(cfg) ? "" : " (not used)");
-
- PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
- PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
- return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-void read_decode_cache_bcr(void)
-{
- struct cpuinfo_arc_cache *p_ic, *p_dc;
- unsigned int cpu = smp_processor_id();
- struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
- unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
- } ibcr, dbcr;
-
- p_ic = &cpuinfo_arc700[cpu].icache;
- READ_BCR(ARC_REG_IC_BCR, ibcr);
-
- if (!ibcr.ver)
- goto dc_chk;
-
- BUG_ON(ibcr.config != 3);
- p_ic->assoc = 2; /* Fixed to 2w set assoc */
- p_ic->line_len = 8 << ibcr.line_len;
- p_ic->sz_k = 1 << (ibcr.sz - 1);
- p_ic->ver = ibcr.ver;
- p_ic->vipt = 1;
- p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
-
-dc_chk:
- p_dc = &cpuinfo_arc700[cpu].dcache;
- READ_BCR(ARC_REG_DC_BCR, dbcr);
-
- if (!dbcr.ver)
- return;
-
- BUG_ON(dbcr.config != 2);
- p_dc->assoc = 4; /* Fixed to 4w set assoc */
- p_dc->line_len = 16 << dbcr.line_len;
- p_dc->sz_k = 1 << (dbcr.sz - 1);
- p_dc->ver = dbcr.ver;
- p_dc->vipt = 1;
- p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-}
-
-/*
- * 1. Validate the Cache Geomtery (compile time config matches hardware)
- * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
- * (aliasing D-cache configurations are not supported YET)
- * 3. Enable the Caches, setup default flush mode for D-Cache
- * 3. Calculate the SHMLBA used by user space
- */
-void arc_cache_init(void)
-{
- unsigned int __maybe_unused cpu = smp_processor_id();
- char str[256];
-
- printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
-
- if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
- struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
-
- if (!ic->ver)
- panic("cache support enabled but non-existent cache\n");
-
- if (ic->line_len != L1_CACHE_BYTES)
- panic("ICache line [%d] != kernel Config [%d]",
- ic->line_len, L1_CACHE_BYTES);
-
- if (ic->ver != CONFIG_ARC_MMU_VER)
- panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
- ic->ver, CONFIG_ARC_MMU_VER);
- }
-
- if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
- struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
- int handled;
-
- if (!dc->ver)
- panic("cache support enabled but non-existent cache\n");
-
- if (dc->line_len != L1_CACHE_BYTES)
- panic("DCache line [%d] != kernel Config [%d]",
- dc->line_len, L1_CACHE_BYTES);
-
- /* check for D-Cache aliasing */
- handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-
- if (dc->alias && !handled)
- panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- else if (!dc->alias && handled)
- panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- }
-}
-
-#define OP_INV 0x1
-#define OP_FLUSH 0x2
-#define OP_FLUSH_N_INV 0x3
-#define OP_INV_IC 0x4
-
-/*
- * Common Helper for Line Operations on {I,D}-Cache
- */
-static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
- unsigned long sz, const int cacheop)
-{
- unsigned int aux_cmd, aux_tag;
- int num_lines;
- const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
-
- if (cacheop == OP_INV_IC) {
- aux_cmd = ARC_REG_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER > 2)
- aux_tag = ARC_REG_IC_PTAG;
-#endif
- }
- else {
- /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
- aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER > 2)
- aux_tag = ARC_REG_DC_PTAG;
-#endif
- }
-
- /* Ensure we properly floor/ceil the non-line aligned/sized requests
- * and have @paddr - aligned to cache line and integral @num_lines.
- * This however can be avoided for page sized since:
- * -@paddr will be cache-line aligned already (being page aligned)
- * -@sz will be integral multiple of line size (being page sized).
- */
- if (!full_page_op) {
- sz += paddr & ~CACHE_LINE_MASK;
- paddr &= CACHE_LINE_MASK;
- vaddr &= CACHE_LINE_MASK;
- }
-
- num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-#if (CONFIG_ARC_MMU_VER <= 2)
- /* MMUv2 and before: paddr contains stuffed vaddrs bits */
- paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#else
- /* if V-P const for loop, PTAG can be written once outside loop */
- if (full_page_op)
- write_aux_reg(aux_tag, paddr);
-#endif
-
- while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
- /* MMUv3, cache ops require paddr seperately */
- if (!full_page_op) {
- write_aux_reg(aux_tag, paddr);
- paddr += L1_CACHE_BYTES;
- }
-
- write_aux_reg(aux_cmd, vaddr);
- vaddr += L1_CACHE_BYTES;
-#else
- write_aux_reg(aux_cmd, paddr);
- paddr += L1_CACHE_BYTES;
-#endif
- }
-}
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-
-/***************************************************************
- * Machine specific helpers for Entire D-Cache or Per Line ops
- */
-
-static inline unsigned int __before_dc_op(const int op)
-{
- unsigned int reg = reg;
-
- if (op == OP_FLUSH_N_INV) {
- /* Dcache provides 2 cmd: FLUSH or INV
- * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
- * flush-n-inv is achieved by INV cmd but with IM=1
- * So toggle INV sub-mode depending on op request and default
- */
- reg = read_aux_reg(ARC_REG_DC_CTRL);
- write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
- ;
- }
-
- return reg;
-}
-
-static inline void __after_dc_op(const int op, unsigned int reg)
-{
- if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
- while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
-
- /* Switch back to default Invalidate mode */
- if (op == OP_FLUSH_N_INV)
- write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
-}
-
-/*
- * Operation on Entire D-Cache
- * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
- * Note that constant propagation ensures all the checks are gone
- * in generated code
- */
-static inline void __dc_entire_op(const int cacheop)
-{
- unsigned int ctrl_reg;
- int aux;
-
- ctrl_reg = __before_dc_op(cacheop);
-
- if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
- aux = ARC_REG_DC_IVDC;
- else
- aux = ARC_REG_DC_FLSH;
-
- write_aux_reg(aux, 0x1);
-
- __after_dc_op(cacheop, ctrl_reg);
-}
-
-/* For kernel mappings cache operation: index is same as paddr */
-#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op)
-
-/*
- * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
- */
-static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
- unsigned long sz, const int cacheop)
-{
- unsigned long flags;
- unsigned int ctrl_reg;
-
- local_irq_save(flags);
-
- ctrl_reg = __before_dc_op(cacheop);
-
- __cache_line_loop(paddr, vaddr, sz, cacheop);
-
- __after_dc_op(cacheop, ctrl_reg);
-
- local_irq_restore(flags);
-}
-
-#else
-
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, vaddr, sz, cacheop)
-#define __dc_line_op_k(paddr, sz, cacheop)
-
-#endif /* CONFIG_ARC_HAS_DCACHE */
-
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-
-/*
- * I-Cache Aliasing in ARC700 VIPT caches
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
- *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
- *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
- */
-
-/***********************************************************
- * Machine specific helper for per line I-Cache invalidate.
- */
-
-static inline void __ic_entire_inv(void)
-{
- write_aux_reg(ARC_REG_IC_IVIC, 1);
- read_aux_reg(ARC_REG_IC_CTRL); /* blocks */
-}
-
-static inline void
-__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
- unsigned long sz)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
- local_irq_restore(flags);
-}
-
-#ifndef CONFIG_SMP
-
-#define __ic_line_inv_vaddr(p, v, s) __ic_line_inv_vaddr_local(p, v, s)
-
-#else
-
-struct ic_inv_args {
- unsigned long paddr, vaddr;
- int sz;
-};
-
-static void __ic_line_inv_vaddr_helper(void *info)
-{
- struct ic_inv_args *ic_inv = info;
-
- __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
-}
-
-static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
- unsigned long sz)
-{
- struct ic_inv_args ic_inv = {
- .paddr = paddr,
- .vaddr = vaddr,
- .sz = sz
- };
-
- on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
-}
-
-#endif /* CONFIG_SMP */
-
-#else /* !CONFIG_ARC_HAS_ICACHE */
-
-#define __ic_entire_inv()
-#define __ic_line_inv_vaddr(pstart, vstart, sz)
-
-#endif /* CONFIG_ARC_HAS_ICACHE */
-
-
-/***********************************************************
- * Exported APIs
- */
-
-/*
- * Handle cache congruency of kernel and userspace mappings of page when kernel
- * writes-to/reads-from
- *
- * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
- * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
- * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
- * -In SMP, if hardware caches are coherent
- *
- * There's a corollary case, where kernel READs from a userspace mapped page.
- * If the U-mapping is not congruent to to K-mapping, former needs flushing.
- */
-void flush_dcache_page(struct page *page)
-{
- struct address_space *mapping;
-
- if (!cache_is_vipt_aliasing()) {
- clear_bit(PG_dc_clean, &page->flags);
- return;
- }
-
- /* don't handle anon pages here */
- mapping = page_mapping(page);
- if (!mapping)
- return;
-
- /*
- * pagecache page, file not yet mapped to userspace
- * Make a note that K-mapping is dirty
- */
- if (!mapping_mapped(mapping)) {
- clear_bit(PG_dc_clean, &page->flags);
- } else if (page_mapped(page)) {
-
- /* kernel reading from page with U-mapping */
- void *paddr = page_address(page);
- unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
-
- if (addr_not_cache_congruent(paddr, vaddr))
- __flush_dcache_page(paddr, vaddr);
- }
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-
-void dma_cache_wback_inv(unsigned long start, unsigned long sz)
-{
- __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
-}
-EXPORT_SYMBOL(dma_cache_wback_inv);
-
-void dma_cache_inv(unsigned long start, unsigned long sz)
-{
- __dc_line_op_k(start, sz, OP_INV);
-}
-EXPORT_SYMBOL(dma_cache_inv);
-
-void dma_cache_wback(unsigned long start, unsigned long sz)
-{
- __dc_line_op_k(start, sz, OP_FLUSH);
-}
-EXPORT_SYMBOL(dma_cache_wback);
-
-/*
- * This is API for making I/D Caches consistent when modifying
- * kernel code (loadable modules, kprobes, kgdb...)
- * This is called on insmod, with kernel virtual address for CODE of
- * the module. ARC cache maintenance ops require PHY address thus we
- * need to convert vmalloc addr to PHY addr
- */
-void flush_icache_range(unsigned long kstart, unsigned long kend)
-{
- unsigned int tot_sz;
-
- WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
-
- /* Shortcut for bigger flush ranges.
- * Here we don't care if this was kernel virtual or phy addr
- */
- tot_sz = kend - kstart;
- if (tot_sz > PAGE_SIZE) {
- flush_cache_all();
- return;
- }
-
- /* Case: Kernel Phy addr (0x8000_0000 onwards) */
- if (likely(kstart > PAGE_OFFSET)) {
- /*
- * The 2nd arg despite being paddr will be used to index icache
- * This is OK since no alternate virtual mappings will exist
- * given the callers for this case: kprobe/kgdb in built-in
- * kernel code only.
- */
- __sync_icache_dcache(kstart, kstart, kend - kstart);
- return;
- }
-
- /*
- * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
- * (1) ARC Cache Maintenance ops only take Phy addr, hence special
- * handling of kernel vaddr.
- *
- * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
- * it still needs to handle a 2 page scenario, where the range
- * straddles across 2 virtual pages and hence need for loop
- */
- while (tot_sz > 0) {
- unsigned int off, sz;
- unsigned long phy, pfn;
-
- off = kstart % PAGE_SIZE;
- pfn = vmalloc_to_pfn((void *)kstart);
- phy = (pfn << PAGE_SHIFT) + off;
- sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
- __sync_icache_dcache(phy, kstart, sz);
- kstart += sz;
- tot_sz -= sz;
- }
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-/*
- * General purpose helper to make I and D cache lines consistent.
- * @paddr is phy addr of region
- * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
- * However in one instance, when called by kprobe (for a breakpt in
- * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
- * use a paddr to index the cache (despite VIPT). This is fine since since a
- * builtin kernel page will not have any virtual mappings.
- * kprobe on loadable module will be kernel vaddr.
- */
-void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
-{
- __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
- __ic_line_inv_vaddr(paddr, vaddr, len);
-}
-
-/* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
-{
- __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
-}
-
-/*
- * wrapper to clearout kernel or userspace mappings of a page
- * For kernel mappings @vaddr == @paddr
- */
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
-{
- __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
-}
-
-noinline void flush_cache_all(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
-
- __ic_entire_inv();
- __dc_entire_op(OP_FLUSH_N_INV);
-
- local_irq_restore(flags);
-
-}
-
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-
-void flush_cache_mm(struct mm_struct *mm)
-{
- flush_cache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
- unsigned long pfn)
-{
- unsigned int paddr = pfn << PAGE_SHIFT;
-
- u_vaddr &= PAGE_MASK;
-
- ___flush_dcache_page(paddr, u_vaddr);
-
- if (vma->vm_flags & VM_EXEC)
- __inv_icache_page(paddr, u_vaddr);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- flush_cache_all();
-}
-
-void flush_anon_page(struct vm_area_struct *vma, struct page *page,
- unsigned long u_vaddr)
-{
- /* TBD: do we really need to clear the kernel mapping */
- __flush_dcache_page(page_address(page), u_vaddr);
- __flush_dcache_page(page_address(page), page_address(page));
-
-}
-
-#endif
-
-void copy_user_highpage(struct page *to, struct page *from,
- unsigned long u_vaddr, struct vm_area_struct *vma)
-{
- void *kfrom = page_address(from);
- void *kto = page_address(to);
- int clean_src_k_mappings = 0;
-
- /*
- * If SRC page was already mapped in userspace AND it's U-mapping is
- * not congruent with K-mapping, sync former to physical page so that
- * K-mapping in memcpy below, sees the right data
- *
- * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
- * equally valid for SRC page as well
- */
- if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
- __flush_dcache_page(kfrom, u_vaddr);
- clean_src_k_mappings = 1;
- }
-
- copy_page(kto, kfrom);
-
- /*
- * Mark DST page K-mapping as dirty for a later finalization by
- * update_mmu_cache(). Although the finalization could have been done
- * here as well (given that both vaddr/paddr are available).
- * But update_mmu_cache() already has code to do that for other
- * non copied user pages (e.g. read faults which wire in pagecache page
- * directly).
- */
- clear_bit(PG_dc_clean, &to->flags);
-
- /*
- * if SRC was already usermapped and non-congruent to kernel mapping
- * sync the kernel mapping back to physical page
- */
- if (clean_src_k_mappings) {
- __flush_dcache_page(kfrom, kfrom);
- set_bit(PG_dc_clean, &from->flags);
- } else {
- clear_bit(PG_dc_clean, &from->flags);
- }
-}
-
-void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
-{
- clear_page(to);
- clear_bit(PG_dc_clean, &page->flags);
-}
-
-
-/**********************************************************************
- * Explicit Cache flush request from user space via syscall
- * Needed for JITs which generate code on the fly
- */
-SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
-{
- /* TBD: optimize this */
- flush_cache_all();
- return 0;
-}
* Cache bit off in the TLB entry.
*
* The default DMA address == Phy address which is 0x8000_0000 based.
- * A platform/device can make it zero based, by over-riding
- * plat_{dma,kernel}_addr_to_{kernel,dma}
*/
#include <linux/dma-mapping.h>
return NULL;
/* This is bus address, platform dependent */
- *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+ *dma_handle = (dma_addr_t)paddr;
return paddr;
}
void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
- free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
- size);
+ free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_noncoherent);
memset(kvaddr, 0, size);
/* This is bus address, platform dependent */
- *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+ *dma_handle = (dma_addr_t)paddr;
+
+ /*
+ * Evict any existing L1 and/or L2 lines for the backing page
+ * in case it was used earlier as a normal "cached" page.
+ * Yeah this bit us - STAR 9000898266
+ *
+ * Although core does call flush_cache_vmap(), it gets kvaddr hence
+ * can't be used to efficiently flush L1 and/or L2 which need paddr
+ * Currently flush_cache_vmap nukes the L1 cache completely which
+ * will be optimized as a separate commit
+ */
+ dma_cache_wback_inv((unsigned long)paddr, size);
return kvaddr;
}
{
iounmap((void __force __iomem *)kvaddr);
- free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
- size);
+ free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_coherent);
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+#if (CONFIG_ARC_MMU_VER < 4)
+
static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
{
unsigned int idx;
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+#else /* CONFIG_ARC_MMU_VER >= 4) */
+
+static void utlb_invalidate(void)
+{
+ /* No need since uTLB is always in sync with JTLB */
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
+}
+
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+ write_aux_reg(ARC_REG_TLBPD0, pd0);
+ write_aux_reg(ARC_REG_TLBPD1, pd1);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
+}
+
+#endif
+
/*
* Un-conditionally (without lookup) erase the entire MMU contents
*/
#endif
} *mmu3;
+ struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+ n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+ /* DTLB ITLB JES JE JA */
+ unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+ pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+ } *mmu4;
+
tmp = read_aux_reg(ARC_REG_MMU_BCR);
mmu->ver = (tmp >> 24);
if (mmu->ver <= 2) {
mmu2 = (struct bcr_mmu_1_2 *)&tmp;
- mmu->pg_sz = PAGE_SIZE;
+ mmu->pg_sz_k = TO_KB(PAGE_SIZE);
mmu->sets = 1 << mmu2->sets;
mmu->ways = 1 << mmu2->ways;
mmu->u_dtlb = mmu2->u_dtlb;
mmu->u_itlb = mmu2->u_itlb;
- } else {
+ } else if (mmu->ver == 3) {
mmu3 = (struct bcr_mmu_3 *)&tmp;
- mmu->pg_sz = 512 << mmu3->pg_sz;
+ mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
mmu->sets = 1 << mmu3->sets;
mmu->ways = 1 << mmu3->ways;
mmu->u_dtlb = mmu3->u_dtlb;
mmu->u_itlb = mmu3->u_itlb;
+ } else {
+ mmu4 = (struct bcr_mmu_4 *)&tmp;
+ mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
+ mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
+ mmu->sets = 64 << mmu4->n_entry;
+ mmu->ways = mmu4->n_ways * 2;
+ mmu->u_dtlb = mmu4->u_dtlb * 4;
+ mmu->u_itlb = mmu4->u_itlb * 4;
}
mmu->num_tlb = mmu->sets * mmu->ways;
{
int n = 0;
struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
+ char super_pg[64] = "";
+
+ if (p_mmu->s_pg_sz_m)
+ scnprintf(super_pg, 64, "%dM Super Page%s, ",
+ p_mmu->s_pg_sz_m, " (not used)");
n += scnprintf(buf + n, len - n,
- "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
- p_mmu->ver, TO_KB(p_mmu->pg_sz),
+ "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+ p_mmu->ver, p_mmu->pg_sz_k, super_pg,
p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb,
IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
mmu->ver, CONFIG_ARC_MMU_VER);
}
- if (mmu->pg_sz != PAGE_SIZE)
+ if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
/* Enable the MMU */
* Rahul Trivedi, Amit Bhor: Codito Technologies 2004
*/
- .cpu A7
-
#include <linux/linkage.h>
#include <asm/entry.h>
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/tlb-mmu1.h>
+#ifdef CONFIG_ISA_ARCOMPACT
;-----------------------------------------------------------------
; ARC700 Exception Handling doesn't auto-switch stack and it only provides
; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
#endif
.endm
+#else /* ARCv2 */
+
+.macro TLBMISS_FREEUP_REGS
+ PUSH r0
+ PUSH r1
+ PUSH r2
+ PUSH r3
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+ POP r3
+ POP r2
+ POP r1
+ POP r0
+.endm
+
+#endif
+
;============================================================================
; Troubleshooting Stuff
;============================================================================
; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU
+#if (CONFIG_ARC_MMU_VER < 4)
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
#else
sr TLBWrite, [ARC_REG_TLBCOMMAND]
#endif
+
+#else
+ sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
+#endif
.endm
CONV_PTE_TO_TLB
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
+EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation
rtie
END(EV_TLBMissI)
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
+EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
rtie
;-------- Common routine to call Linux Page Fault Handler -----------
; Slow path TLB Miss handled as a regular ARC Exception
; (stack switching / save the complete reg-file).
- EXCEPTION_PROLOGUE
-
- ; ------- setup args for Linux Page fault Hanlder ---------
- mov_s r1, sp
- lr r0, [efa]
-
- ; We don't want exceptions to be disabled while the fault is handled.
- ; Now that we have saved the context we return from exception hence
- ; exceptions get re-enable
-
- FAKE_RET_FROM_EXCPN r9
-
- bl do_page_fault
- b ret_from_exception
-
+ b call_do_page_fault
END(EV_TLBMissD)
+++ /dev/null
-#
-# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-
-menuconfig ARC_PLAT_FPGA_LEGACY
- bool "\"Legacy\" ARC FPGA dev Boards"
- select ARC_HAS_COH_CACHES if SMP
- help
- Support for ARC development boards, provided by Synopsys.
- These are based on FPGA or ISS. e.g.
- - ARCAngel4
- - ML509
- - MetaWare ISS
-
-if ARC_PLAT_FPGA_LEGACY
-
-config ISS_SMP_EXTN
- bool "ARC SMP Extensions (ISS Models only)"
- default n
- depends on SMP
- help
- SMP Extensions to ARC700, in a "simulation only" Model, supported in
- ARC ISS (Instruction Set Simulator).
- The SMP extensions include:
- -IDU (Interrupt Distribution Unit)
- -XTL (To enable CPU start/stop/set-PC for another CPU)
- It doesn't provide coherent Caches and/or Atomic Ops (LLOCK/SCOND)
-
-endif
+++ /dev/null
-#
-# Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-
-KBUILD_CFLAGS += -Iarch/arc/plat-arcfpga/include
-
-obj-y := platform.o
-obj-$(CONFIG_ISS_SMP_EXTN) += smp.o
+++ /dev/null
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Rajeshwar Ranga: Interrupt Distribution Unit API's
- */
-
-#ifndef __PLAT_ARCFPGA_SMP_H
-#define __PLAT_ARCFPGA_SMP_H
-
-#ifdef CONFIG_SMP
-
-#include <linux/types.h>
-#include <asm/arcregs.h>
-
-#define ARC_AUX_IDU_REG_CMD 0x2000
-#define ARC_AUX_IDU_REG_PARAM 0x2001
-
-#define ARC_AUX_XTL_REG_CMD 0x2002
-#define ARC_AUX_XTL_REG_PARAM 0x2003
-
-#define ARC_REG_MP_BCR 0x2021
-
-#define ARC_XTL_CMD_WRITE_PC 0x04
-#define ARC_XTL_CMD_CLEAR_HALT 0x02
-
-/*
- * Build Configuration Register which identifies the sub-components
- */
-struct bcr_mp {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int mp_arch:16, pad:5, sdu:1, idu:1, scu:1, ver:8;
-#else
- unsigned int ver:8, scu:1, idu:1, sdu:1, pad:5, mp_arch:16;
-#endif
-};
-
-/* IDU supports 256 common interrupts */
-#define NR_IDU_IRQS 256
-
-/*
- * The Aux Regs layout is same bit-by-bit in both BE/LE modes.
- * However when casted as a bitfield encoded "C" struct, gcc treats it as
- * memory, generating different code for BE/LE, requiring strcture adj (see
- * include/asm/arcregs.h)
- *
- * However when manually "carving" the value for a Aux, no special handling
- * of BE is needed because of the property discribed above
- */
-#define IDU_SET_COMMAND(irq, cmd) \
-do { \
- uint32_t __val; \
- __val = (((irq & 0xFF) << 8) | (cmd & 0xFF)); \
- write_aux_reg(ARC_AUX_IDU_REG_CMD, __val); \
-} while (0)
-
-#define IDU_SET_PARAM(par) write_aux_reg(ARC_AUX_IDU_REG_PARAM, par)
-#define IDU_GET_PARAM() read_aux_reg(ARC_AUX_IDU_REG_PARAM)
-
-/* IDU Commands */
-#define IDU_DISABLE 0x00
-#define IDU_ENABLE 0x01
-#define IDU_IRQ_CLEAR 0x02
-#define IDU_IRQ_ASSERT 0x03
-#define IDU_IRQ_WMODE 0x04
-#define IDU_IRQ_STATUS 0x05
-#define IDU_IRQ_ACK 0x06
-#define IDU_IRQ_PEND 0x07
-#define IDU_IRQ_RMODE 0x08
-#define IDU_IRQ_WBITMASK 0x09
-#define IDU_IRQ_RBITMASK 0x0A
-
-#define idu_enable() IDU_SET_COMMAND(0, IDU_ENABLE)
-#define idu_disable() IDU_SET_COMMAND(0, IDU_DISABLE)
-
-#define idu_irq_assert(irq) IDU_SET_COMMAND((irq), IDU_IRQ_ASSERT)
-#define idu_irq_clear(irq) IDU_SET_COMMAND((irq), IDU_IRQ_CLEAR)
-
-/* IDU Interrupt Mode - Destination Encoding */
-#define IDU_IRQ_MOD_DISABLE 0x00
-#define IDU_IRQ_MOD_ROUND_RECP 0x01
-#define IDU_IRQ_MOD_TCPU_FIRSTRECP 0x02
-#define IDU_IRQ_MOD_TCPU_ALLRECP 0x03
-
-/* IDU Interrupt Mode - Triggering Mode */
-#define IDU_IRQ_MODE_LEVEL_TRIG 0x00
-#define IDU_IRQ_MODE_PULSE_TRIG 0x01
-
-#define IDU_IRQ_MODE_PARAM(dest_mode, trig_mode) \
- (((trig_mode & 0x01) << 15) | (dest_mode & 0xFF))
-
-struct idu_irq_config {
- uint8_t irq;
- uint8_t dest_mode;
- uint8_t trig_mode;
-};
-
-struct idu_irq_status {
- uint8_t irq;
- bool enabled;
- bool status;
- bool ack;
- bool pend;
- uint8_t next_rr;
-};
-
-extern void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask);
-extern void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode);
-
-extern void iss_model_init_smp(unsigned int cpu);
-extern void iss_model_init_early_smp(void);
-
-#endif /* CONFIG_SMP */
-
-#endif
+++ /dev/null
-/*
- * ARC FPGA Platform support code
- *
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <asm/mach_desc.h>
-#include <plat/smp.h>
-
-/*----------------------- Machine Descriptions ------------------------------
- *
- * Machine description is simply a set of platform/board specific callbacks
- * This is not directly related to DeviceTree based dynamic device creation,
- * however as part of early device tree scan, we also select the right
- * callback set, by matching the DT compatible name.
- */
-
-static const char *legacy_fpga_compat[] __initconst = {
- "snps,arc-angel4",
- "snps,arc-ml509",
- NULL,
-};
-
-MACHINE_START(LEGACY_FPGA, "legacy_fpga")
- .dt_compat = legacy_fpga_compat,
-#ifdef CONFIG_ISS_SMP_EXTN
- .init_early = iss_model_init_early_smp,
- .init_smp = iss_model_init_smp,
-#endif
-MACHINE_END
-
-static const char *simulation_compat[] __initconst = {
- "snps,nsim",
- "snps,nsimosci",
- NULL,
-};
-
-MACHINE_START(SIMULATION, "simulation")
- .dt_compat = simulation_compat,
-MACHINE_END
+++ /dev/null
-/*
- * ARC700 Simulation-only Extensions for SMP
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Vineet Gupta - 2012 : split off arch common and plat specific SMP
- * Rajeshwar Ranga - 2007 : Interrupt Distribution Unit API's
- */
-
-#include <linux/smp.h>
-#include <linux/irq.h>
-#include <plat/smp.h>
-
-#define IDU_INTERRUPT_0 16
-
-static char smp_cpuinfo_buf[128];
-
-/*
- *-------------------------------------------------------------------
- * Platform specific callbacks expected by arch SMP code
- *-------------------------------------------------------------------
- */
-
-/*
- * Master kick starting another CPU
- */
-static void iss_model_smp_wakeup_cpu(int cpu, unsigned long pc)
-{
- /* setup the start PC */
- write_aux_reg(ARC_AUX_XTL_REG_PARAM, pc);
-
- /* Trigger WRITE_PC cmd for this cpu */
- write_aux_reg(ARC_AUX_XTL_REG_CMD,
- (ARC_XTL_CMD_WRITE_PC | (cpu << 8)));
-
- /* Take the cpu out of Halt */
- write_aux_reg(ARC_AUX_XTL_REG_CMD,
- (ARC_XTL_CMD_CLEAR_HALT | (cpu << 8)));
-
-}
-
-static inline int get_hw_config_num_irq(void)
-{
- uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
-
- switch (val & 0x03) {
- case 0:
- return 16;
- case 1:
- return 32;
- case 2:
- return 8;
- default:
- return 0;
- }
-
- return 0;
-}
-
-/*
- * Any SMP specific init any CPU does when it comes up.
- * Here we setup the CPU to enable Inter-Processor-Interrupts
- * Called for each CPU
- * -Master : init_IRQ()
- * -Other(s) : start_kernel_secondary()
- */
-void iss_model_init_smp(unsigned int cpu)
-{
- /* Check if CPU is configured for more than 16 interrupts */
- if (NR_IRQS <= 16 || get_hw_config_num_irq() <= 16)
- panic("[arcfpga] IRQ system can't support IDU IPI\n");
-
- idu_disable();
-
- /****************************************************************
- * IDU provides a set of Common IRQs, each of which can be dynamically
- * attached to (1|many|all) CPUs.
- * The Common IRQs [0-15] are mapped as CPU pvt [16-31]
- *
- * Here we use a simple 1:1 mapping:
- * A CPU 'x' is wired to Common IRQ 'x'.
- * So an IDU ASSERT on IRQ 'x' will trigger Interupt on CPU 'x', which
- * makes up for our simple IPI plumbing.
- *
- * TBD: Have a dedicated multicast IRQ for sending IPIs to all CPUs
- * w/o having to do one-at-a-time
- ******************************************************************/
-
- /*
- * Claim an IRQ which would trigger IPI on this CPU.
- * In IDU parlance it involves setting up a cpu bitmask for the IRQ
- * The bitmap here contains only 1 CPU (self).
- */
- idu_irq_set_tgtcpu(cpu, 0x1 << cpu);
-
- /* Set the IRQ destination to use the bitmask above */
- idu_irq_set_mode(cpu, 7, /* XXX: IDU_IRQ_MOD_TCPU_ALLRECP: ISS bug */
- IDU_IRQ_MODE_PULSE_TRIG);
-
- idu_enable();
-
- /* Attach the arch-common IPI ISR to our IDU IRQ */
- smp_ipi_irq_setup(cpu, IDU_INTERRUPT_0 + cpu);
-}
-
-static void iss_model_ipi_send(int cpu)
-{
- idu_irq_assert(cpu);
-}
-
-static void iss_model_ipi_clear(int irq)
-{
- idu_irq_clear(IDU_INTERRUPT_0 + smp_processor_id());
-}
-
-void iss_model_init_early_smp(void)
-{
-#define IS_AVAIL1(var, str) ((var) ? str : "")
-
- struct bcr_mp mp;
-
- READ_BCR(ARC_REG_MP_BCR, mp);
-
- sprintf(smp_cpuinfo_buf, "Extn [ISS-SMP]: v%d, arch(%d) %s %s %s\n",
- mp.ver, mp.mp_arch, IS_AVAIL1(mp.scu, "SCU"),
- IS_AVAIL1(mp.idu, "IDU"), IS_AVAIL1(mp.sdu, "SDU"));
-
- plat_smp_ops.info = smp_cpuinfo_buf;
-
- plat_smp_ops.cpu_kick = iss_model_smp_wakeup_cpu;
- plat_smp_ops.ipi_send = iss_model_ipi_send;
- plat_smp_ops.ipi_clear = iss_model_ipi_clear;
-}
-
-/*
- *-------------------------------------------------------------------
- * Low level Platform IPI Providers
- *-------------------------------------------------------------------
- */
-
-/* Set the Mode for the Common IRQ */
-void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode)
-{
- uint32_t par = IDU_IRQ_MODE_PARAM(dest_mode, trig_mode);
-
- IDU_SET_PARAM(par);
- IDU_SET_COMMAND(irq, IDU_IRQ_WMODE);
-}
-
-/* Set the target cpu Bitmask for Common IRQ */
-void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask)
-{
- IDU_SET_PARAM(mask);
- IDU_SET_COMMAND(irq, IDU_IRQ_WBITMASK);
-}
-
-/* Get the Interrupt Acknowledged status for IRQ (as CPU Bitmask) */
-bool idu_irq_get_ack(uint8_t irq)
-{
- uint32_t val;
-
- IDU_SET_COMMAND(irq, IDU_IRQ_ACK);
- val = IDU_GET_PARAM();
-
- return val & (1 << irq);
-}
-
-/*
- * Get the Interrupt Pending status for IRQ (as CPU Bitmask)
- * -Pending means CPU has not yet noticed the IRQ (e.g. disabled)
- * -After Interrupt has been taken, the IPI expcitily needs to be
- * cleared, to be acknowledged.
- */
-bool idu_irq_get_pend(uint8_t irq)
-{
- uint32_t val;
-
- IDU_SET_COMMAND(irq, IDU_IRQ_PEND);
- val = IDU_GET_PARAM();
-
- return val & (1 << irq);
-}
--- /dev/null
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_PLAT_AXS10X
+ bool "Synopsys ARC AXS10x Software Development Platforms"
+ select DW_APB_ICTL
+ select GPIO_DWAPB
+ select OF_GPIO
+ select GENERIC_IRQ_CHIP
+ select ARCH_REQUIRE_GPIOLIB
+ help
+ Support for the ARC AXS10x Software Development Platforms.
+
+ The AXS10x Platforms consist of a mainboard with peripherals,
+ on which several daughter cards can be placed. The daughter cards
+ typically contain a CPU and memory.
+
+if ARC_PLAT_AXS10X
+
+config AXS101
+ depends on ISA_ARCOMPACT
+ bool "AXS101 with AXC001 CPU Card (ARC 770D/EM6/AS221)"
+ help
+ This adds support for the 770D/EM6/AS221 CPU Card. Only the ARC
+ 770D is supported in Linux.
+
+ The AXS101 Platform consists of an AXS10x mainboard with
+ this daughtercard. Please use the axs101.dts device tree
+ with this configuration.
+
+config AXS103
+ bool "AXS103 with AXC003 CPU Card (ARC HS38x)"
+ depends on ISA_ARCV2
+ help
+ This adds support for the HS38x CPU Card.
+
+ The AXS103 Platform consists of an AXS10x mainboard with
+ this daughtercard. Please use the axs103.dts device tree
+ with this configuration.
+
+endif
--- /dev/null
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_ARC_PLAT_AXS10X) += axs10x.o
--- /dev/null
+/*
+ * AXS101/AXS103 Software Development Platform
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/of_platform.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/clk.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
+#define AXS_MB_CGU 0xE0010000
+#define AXS_MB_CREG 0xE0011000
+
+#define CREG_MB_IRQ_MUX (AXS_MB_CREG + 0x214)
+#define CREG_MB_SW_RESET (AXS_MB_CREG + 0x220)
+#define CREG_MB_VER (AXS_MB_CREG + 0x230)
+#define CREG_MB_CONFIG (AXS_MB_CREG + 0x234)
+
+#define AXC001_CREG 0xF0001000
+#define AXC001_GPIO_INTC 0xF0003000
+
+static void __init axs10x_enable_gpio_intc_wire(void)
+{
+ /*
+ * Peripherals on CPU Card and Mother Board are wired to cpu intc via
+ * intermediate DW APB GPIO blocks (mainly for debouncing)
+ *
+ * ---------------------
+ * | snps,arc700-intc |
+ * ---------------------
+ * | #7 | #15
+ * ------------------- -------------------
+ * | snps,dw-apb-gpio | | snps,dw-apb-gpio |
+ * ------------------- -------------------
+ * | |
+ * | [ Debug UART on cpu card ]
+ * |
+ * ------------------------
+ * | snps,dw-apb-intc (MB)|
+ * ------------------------
+ * | | | |
+ * [eth] [uart] [... other perip on Main Board]
+ *
+ * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
+ * with stacked INTCs. In particular problem happens if its master INTC
+ * not yet instantiated. See discussion here -
+ * https://lkml.org/lkml/2015/3/4/755
+ *
+ * So setup the first gpio block as a passive pass thru and hide it from
+ * DT hardware topology - connect MB intc directly to cpu intc
+ * The GPIO "wire" needs to be init nevertheless (here)
+ *
+ * One side adv is that peripheral interrupt handling avoids one nested
+ * intc ISR hop
+ */
+#define GPIO_INTEN (AXC001_GPIO_INTC + 0x30)
+#define GPIO_INTMASK (AXC001_GPIO_INTC + 0x34)
+#define GPIO_INTTYPE_LEVEL (AXC001_GPIO_INTC + 0x38)
+#define GPIO_INT_POLARITY (AXC001_GPIO_INTC + 0x3c)
+#define MB_TO_GPIO_IRQ 12
+
+ iowrite32(~(1 << MB_TO_GPIO_IRQ), (void __iomem *) GPIO_INTMASK);
+ iowrite32(0, (void __iomem *) GPIO_INTTYPE_LEVEL);
+ iowrite32(~0, (void __iomem *) GPIO_INT_POLARITY);
+ iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN);
+}
+
+static inline void __init
+write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg)
+{
+ unsigned int loops = 128 * 1024, ctr;
+
+ iowrite32(value, reg);
+
+ ctr = loops;
+ while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */
+ cpu_relax();
+
+ ctr = loops;
+ while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */
+ cpu_relax();
+}
+
+static void __init axs10x_print_board_ver(unsigned int creg, const char *str)
+{
+ union ver {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:11, y:12, m:4, d:5;
+#else
+ unsigned int d:5, m:4, y:12, pad:11;
+#endif
+ };
+ unsigned int val;
+ } board;
+
+ board.val = ioread32((void __iomem *)creg);
+ pr_info("AXS: %s FPGA Date: %u-%u-%u\n", str, board.d, board.m,
+ board.y);
+}
+
+static void __init axs10x_early_init(void)
+{
+ int mb_rev;
+ char mb[32];
+
+ /* Determine motherboard version */
+ if (ioread32((void __iomem *) CREG_MB_CONFIG) & (1 << 28))
+ mb_rev = 3; /* HT-3 (rev3.0) */
+ else
+ mb_rev = 2; /* HT-2 (rev2.0) */
+
+ axs10x_enable_gpio_intc_wire();
+
+ scnprintf(mb, 32, "MainBoard v%d", mb_rev);
+ axs10x_print_board_ver(CREG_MB_VER, mb);
+}
+
+#ifdef CONFIG_AXS101
+
+#define CREG_CPU_ADDR_770 (AXC001_CREG + 0x20)
+#define CREG_CPU_ADDR_TUNN (AXC001_CREG + 0x60)
+#define CREG_CPU_ADDR_770_UPD (AXC001_CREG + 0x34)
+#define CREG_CPU_ADDR_TUNN_UPD (AXC001_CREG + 0x74)
+
+#define CREG_CPU_ARC770_IRQ_MUX (AXC001_CREG + 0x114)
+#define CREG_CPU_GPIO_UART_MUX (AXC001_CREG + 0x120)
+
+/*
+ * Set up System Memory Map for ARC cpu / peripherals controllers
+ *
+ * Each AXI master has a 4GB memory map specified as 16 apertures of 256MB, each
+ * of which maps to a corresponding 256MB aperture in Target slave memory map.
+ *
+ * e.g. ARC cpu AXI Master's aperture 8 (0x8000_0000) is mapped to aperture 0
+ * (0x0000_0000) of DDR Port 0 (slave #1)
+ *
+ * Access from cpu to MB controllers such as GMAC is setup using AXI Tunnel:
+ * which has master/slaves on both ends.
+ * e.g. aperture 14 (0xE000_0000) of ARC cpu is mapped to aperture 14
+ * (0xE000_0000) of CPU Card AXI Tunnel slave (slave #3) which is mapped to
+ * MB AXI Tunnel Master, which also has a mem map setup
+ *
+ * In the reverse direction, MB AXI Masters (e.g. GMAC) mem map is setup
+ * to map to MB AXI Tunnel slave which connects to CPU Card AXI Tunnel Master
+ */
+struct aperture {
+ unsigned int slave_sel:4, slave_off:4, pad:24;
+};
+
+/* CPU Card target slaves */
+#define AXC001_SLV_NONE 0
+#define AXC001_SLV_DDR_PORT0 1
+#define AXC001_SLV_SRAM 2
+#define AXC001_SLV_AXI_TUNNEL 3
+#define AXC001_SLV_AXI2APB 6
+#define AXC001_SLV_DDR_PORT1 7
+
+/* MB AXI Target slaves */
+#define AXS_MB_SLV_NONE 0
+#define AXS_MB_SLV_AXI_TUNNEL_CPU 1
+#define AXS_MB_SLV_AXI_TUNNEL_HAPS 2
+#define AXS_MB_SLV_SRAM 3
+#define AXS_MB_SLV_CONTROL 4
+
+/* MB AXI masters */
+#define AXS_MB_MST_TUNNEL_CPU 0
+#define AXS_MB_MST_USB_OHCI 10
+
+/*
+ * memmap for ARC core on CPU Card
+ */
+static const struct aperture axc001_memmap[16] = {
+ {AXC001_SLV_AXI_TUNNEL, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0x1},
+ {AXC001_SLV_SRAM, 0x0}, /* 0x2000_0000: Local SRAM */
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_DDR_PORT0, 0x0}, /* 0x8000_0000: DDR 0..256M */
+ {AXC001_SLV_DDR_PORT0, 0x1}, /* 0x9000_0000: DDR 256..512M */
+ {AXC001_SLV_DDR_PORT0, 0x2},
+ {AXC001_SLV_DDR_PORT0, 0x3},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0xD},
+ {AXC001_SLV_AXI_TUNNEL, 0xE}, /* MB: CREG, CGU... */
+ {AXC001_SLV_AXI2APB, 0x0}, /* CPU Card local CREG, CGU... */
+};
+
+/*
+ * memmap for CPU Card AXI Tunnel Master (for access by MB controllers)
+ * GMAC (MB) -> MB AXI Tunnel slave -> CPU Card AXI Tunnel Master -> DDR
+ */
+static const struct aperture axc001_axi_tunnel_memmap[16] = {
+ {AXC001_SLV_AXI_TUNNEL, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0x1},
+ {AXC001_SLV_SRAM, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_DDR_PORT1, 0x0},
+ {AXC001_SLV_DDR_PORT1, 0x1},
+ {AXC001_SLV_DDR_PORT1, 0x2},
+ {AXC001_SLV_DDR_PORT1, 0x3},
+ {AXC001_SLV_NONE, 0x0},
+ {AXC001_SLV_AXI_TUNNEL, 0xD},
+ {AXC001_SLV_AXI_TUNNEL, 0xE},
+ {AXC001_SLV_AXI2APB, 0x0},
+};
+
+/*
+ * memmap for MB AXI Masters
+ * Same mem map for all perip controllers as well as MB AXI Tunnel Master
+ */
+static const struct aperture axs_mb_memmap[16] = {
+ {AXS_MB_SLV_SRAM, 0x0},
+ {AXS_MB_SLV_SRAM, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0x8}, /* DDR on CPU Card */
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0x9}, /* DDR on CPU Card */
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xA},
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xB},
+ {AXS_MB_SLV_NONE, 0x0},
+ {AXS_MB_SLV_AXI_TUNNEL_HAPS, 0xD},
+ {AXS_MB_SLV_CONTROL, 0x0}, /* MB Local CREG, CGU... */
+ {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xF},
+};
+
+static noinline void __init
+axs101_set_memmap(void __iomem *base, const struct aperture map[16])
+{
+ unsigned int slave_select, slave_offset;
+ int i;
+
+ slave_select = slave_offset = 0;
+ for (i = 0; i < 8; i++) {
+ slave_select |= map[i].slave_sel << (i << 2);
+ slave_offset |= map[i].slave_off << (i << 2);
+ }
+
+ iowrite32(slave_select, base + 0x0); /* SLV0 */
+ iowrite32(slave_offset, base + 0x8); /* OFFSET0 */
+
+ slave_select = slave_offset = 0;
+ for (i = 0; i < 8; i++) {
+ slave_select |= map[i+8].slave_sel << (i << 2);
+ slave_offset |= map[i+8].slave_off << (i << 2);
+ }
+
+ iowrite32(slave_select, base + 0x4); /* SLV1 */
+ iowrite32(slave_offset, base + 0xC); /* OFFSET1 */
+}
+
+static void __init axs101_early_init(void)
+{
+ int i;
+
+ /* ARC 770D memory view */
+ axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_770, axc001_memmap);
+ iowrite32(1, (void __iomem *) CREG_CPU_ADDR_770_UPD);
+
+ /* AXI tunnel memory map (incoming traffic from MB into CPU Card */
+ axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_TUNN,
+ axc001_axi_tunnel_memmap);
+ iowrite32(1, (void __iomem *) CREG_CPU_ADDR_TUNN_UPD);
+
+ /* MB peripherals memory map */
+ for (i = AXS_MB_MST_TUNNEL_CPU; i <= AXS_MB_MST_USB_OHCI; i++)
+ axs101_set_memmap((void __iomem *) AXS_MB_CREG + (i << 4),
+ axs_mb_memmap);
+
+ iowrite32(0x3ff, (void __iomem *) AXS_MB_CREG + 0x100); /* Update */
+
+ /* GPIO pins 18 and 19 are used as UART rx and tx, respectively. */
+ iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+ /* Set up the MB interrupt system: mux interrupts to GPIO7) */
+ iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+ /* reset ethernet and ULPI interfaces */
+ iowrite32(0x18, (void __iomem *) CREG_MB_SW_RESET);
+
+ /* map GPIO 14:10 to ARC 9:5 (IRQ mux change for MB v2 onwards) */
+ iowrite32(0x52, (void __iomem *) CREG_CPU_ARC770_IRQ_MUX);
+
+ axs10x_early_init();
+}
+
+#endif /* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+#define AXC003_CGU 0xF0000000
+#define AXC003_CREG 0xF0001000
+#define AXC003_MST_AXI_TUNNEL 0
+#define AXC003_MST_HS38 1
+
+#define CREG_CPU_AXI_M0_IRQ_MUX (AXC003_CREG + 0x440)
+#define CREG_CPU_GPIO_UART_MUX (AXC003_CREG + 0x480)
+#define CREG_CPU_TUN_IO_CTRL (AXC003_CREG + 0x494)
+
+
+union pll_reg {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6;
+#else
+ unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17;
+#endif
+ };
+ unsigned int val;
+};
+
+static unsigned int __init axs103_get_freq(void)
+{
+ union pll_reg idiv, fbdiv, odiv;
+ unsigned int f = 33333333;
+
+ idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0);
+ fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4);
+ odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8);
+
+ if (idiv.bypass != 1)
+ f = f / (idiv.low + idiv.high);
+
+ if (fbdiv.bypass != 1)
+ f = f * (fbdiv.low + fbdiv.high);
+
+ if (odiv.bypass != 1)
+ f = f / (odiv.low + odiv.high);
+
+ f = (f + 500000) / 1000000; /* Rounding */
+ return f;
+}
+
+static inline unsigned int __init encode_div(unsigned int id, int upd)
+{
+ union pll_reg div;
+
+ div.val = 0;
+
+ div.noupd = !upd;
+ div.bypass = id == 1 ? 1 : 0;
+ div.edge = (id%2 == 0) ? 0 : 1; /* 0 = rising */
+ div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1;
+ div.high = id >> 1;
+
+ return div.val;
+}
+
+noinline static void __init
+axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
+{
+ write_cgu_reg(encode_div(id, 0),
+ (void __iomem *)AXC003_CGU + 0x80 + 0,
+ (void __iomem *)AXC003_CGU + 0x110);
+
+ write_cgu_reg(encode_div(fd, 0),
+ (void __iomem *)AXC003_CGU + 0x80 + 4,
+ (void __iomem *)AXC003_CGU + 0x110);
+
+ write_cgu_reg(encode_div(od, 1),
+ (void __iomem *)AXC003_CGU + 0x80 + 8,
+ (void __iomem *)AXC003_CGU + 0x110);
+}
+
+static void __init axs103_early_init(void)
+{
+ switch (arc_get_core_freq()/1000000) {
+ case 33:
+ axs103_set_freq(1, 1, 1);
+ break;
+ case 50:
+ axs103_set_freq(1, 30, 20);
+ break;
+ case 75:
+ axs103_set_freq(2, 45, 10);
+ break;
+ case 90:
+ axs103_set_freq(2, 54, 10);
+ break;
+ case 100:
+ axs103_set_freq(1, 30, 10);
+ break;
+ case 125:
+ axs103_set_freq(2, 45, 6);
+ break;
+ default:
+ /*
+ * In this case, core_frequency derived from
+ * DT "clock-frequency" might not match with board value.
+ * Hence update it to match the board value.
+ */
+ arc_set_core_freq(axs103_get_freq() * 1000000);
+ break;
+ }
+
+ pr_info("Freq is %dMHz\n", axs103_get_freq());
+
+ /* Memory maps already config in pre-bootloader */
+
+ /* set GPIO mux to UART */
+ iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+ iowrite32((0x00100000U | 0x000C0000U | 0x00003322U),
+ (void __iomem *) CREG_CPU_TUN_IO_CTRL);
+
+ /* Set up the AXS_MB interrupt system.*/
+ iowrite32(12, (void __iomem *) (CREG_CPU_AXI_M0_IRQ_MUX
+ + (AXC003_MST_HS38 << 2)));
+
+ /* connect ICTL - Main Board with GPIO line */
+ iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+ axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card");
+
+ axs10x_early_init();
+
+#ifdef CONFIG_ARC_MCIP
+ /* No Hardware init, but filling the smp ops callbacks */
+ mcip_init_early_smp();
+#endif
+}
+#endif
+
+#ifdef CONFIG_AXS101
+
+static const char *axs101_compat[] __initconst = {
+ "snps,axs101",
+ NULL,
+};
+
+MACHINE_START(AXS101, "axs101")
+ .dt_compat = axs101_compat,
+ .init_early = axs101_early_init,
+MACHINE_END
+
+#endif /* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+static const char *axs103_compat[] __initconst = {
+ "snps,axs103",
+ NULL,
+};
+
+MACHINE_START(AXS103, "axs103")
+ .dt_compat = axs103_compat,
+ .init_early = axs103_early_init,
+#ifdef CONFIG_ARC_MCIP
+ .init_smp = mcip_init_smp,
+#endif
+MACHINE_END
+
+/*
+ * For the VDK OS-kit, to get the offset to pid and command fields
+ */
+char coware_swa_pid_offset[TASK_PID];
+char coware_swa_comm_offset[TASK_COMM];
+
+#endif /* CONFIG_AXS103 */
--- /dev/null
+#
+# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_PLAT_SIM
+ bool "ARC nSIM based simulation virtual platforms"
+ select ARC_HAS_COH_CACHES if SMP
+ help
+ Support for nSIM based ARC simulation platforms
+ This includes the standalone nSIM (uart only) vs. System C OSCI VP
--- /dev/null
+#
+# Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-y := platform.o
--- /dev/null
+/*
+ * ARC simulation Platform support code
+ *
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <asm/mach_desc.h>
+#include <asm/mcip.h>
+
+/*----------------------- Machine Descriptions ------------------------------
+ *
+ * Machine description is simply a set of platform/board specific callbacks
+ * This is not directly related to DeviceTree based dynamic device creation,
+ * however as part of early device tree scan, we also select the right
+ * callback set, by matching the DT compatible name.
+ */
+
+static const char *simulation_compat[] __initconst = {
+ "snps,nsim",
+ "snps,nsim_hs",
+ "snps,nsimosci",
+ "snps,nsimosci_hs",
+ NULL,
+};
+
+MACHINE_START(SIMULATION, "simulation")
+ .dt_compat = simulation_compat,
+#ifdef CONFIG_ARC_MCIP
+ .init_early = mcip_init_early_smp,
+ .init_smp = mcip_init_smp,
+#endif
+MACHINE_END
};
eth0: ethernet@70000 {
- compatible = "marvell,armada-370-neta";
reg = <0x70000 0x4000>;
interrupts = <8>;
clocks = <&gateclk 4>;
};
eth1: ethernet@74000 {
- compatible = "marvell,armada-370-neta";
reg = <0x74000 0x4000>;
interrupts = <10>;
clocks = <&gateclk 3>;
dmacap,memset;
};
};
+
+ ethernet@70000 {
+ compatible = "marvell,armada-370-neta";
+ };
+
+ ethernet@74000 {
+ compatible = "marvell,armada-370-neta";
+ };
};
};
};
};
eth3: ethernet@34000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x34000 0x4000>;
interrupts = <14>;
clocks = <&gateclk 1>;
};
eth3: ethernet@34000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x34000 0x4000>;
interrupts = <14>;
clocks = <&gateclk 1>;
};
eth2: ethernet@30000 {
- compatible = "marvell,armada-370-neta";
+ compatible = "marvell,armada-xp-neta";
reg = <0x30000 0x4000>;
interrupts = <12>;
clocks = <&gateclk 2>;
};
};
+ ethernet@70000 {
+ compatible = "marvell,armada-xp-neta";
+ };
+
+ ethernet@74000 {
+ compatible = "marvell,armada-xp-neta";
+ };
+
xor@f0900 {
compatible = "marvell,orion-xor";
reg = <0xF0900 0x100
#ifndef _ASM_ARM_XEN_HYPERVISOR_H
#define _ASM_ARM_XEN_HYPERVISOR_H
+#include <linux/init.h>
+
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
extern struct dma_map_ops *xen_dma_ops;
+#ifdef CONFIG_XEN
+void __init xen_early_init(void);
+#else
+static inline void xen_early_init(void) { return; }
+#endif
+
#endif /* _ASM_ARM_XEN_HYPERVISOR_H */
#include <xen/interface/grant_table.h>
#define phys_to_machine_mapping_valid(pfn) (1)
-#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
#define pte_mfn pte_pfn
#define mfn_pte pfn_pte
#include <asm/cacheflush.h>
#include <asm/cachetype.h>
#include <asm/tlbflush.h>
+#include <asm/xen/hypervisor.h>
#include <asm/prom.h>
#include <asm/mach/arch.h>
arm_dt_init_cpu_maps();
psci_init();
+ xen_early_init();
#ifdef CONFIG_SMP
if (is_smp()) {
if (!mdesc->smp_init || !mdesc->smp_init()) {
case IRQ_TYPE_EDGE_RISING:
/* Rising edge sensitive */
__lpc32xx_set_irq_type(d->hwirq, 1, 1);
- __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_FALLING:
/* Falling edge sensitive */
__lpc32xx_set_irq_type(d->hwirq, 0, 1);
- __irq_set_handler_locked(d->hwirq, handle_edge_irq);
+ __irq_set_handler_locked(d->irq, handle_edge_irq);
break;
case IRQ_TYPE_LEVEL_LOW:
/* Low level sensitive */
__lpc32xx_set_irq_type(d->hwirq, 0, 0);
- __irq_set_handler_locked(d->hwirq, handle_level_irq);
+ __irq_set_handler_locked(d->irq, handle_level_irq);
break;
case IRQ_TYPE_LEVEL_HIGH:
/* High level sensitive */
__lpc32xx_set_irq_type(d->hwirq, 1, 0);
- __irq_set_handler_locked(d->hwirq, handle_level_irq);
+ __irq_set_handler_locked(d->irq, handle_level_irq);
break;
/* Other modes are not supported */
#include <linux/cpuidle.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
+#include <linux/console.h>
#include <linux/mm.h>
int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
-static __read_mostly int xen_events_irq = -1;
+static __read_mostly unsigned int xen_events_irq;
+
+static __initdata struct device_node *xen_node;
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
* documentation of the Xen Device Tree format.
*/
#define GRANT_TABLE_PHYSADDR 0
-static int __init xen_guest_init(void)
+void __init xen_early_init(void)
{
- struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page = 0;
- struct device_node *node;
int len;
const char *s = NULL;
const char *version = NULL;
const char *xen_prefix = "xen,xen-";
- struct resource res;
- phys_addr_t grant_frames;
- node = of_find_compatible_node(NULL, NULL, "xen,xen");
- if (!node) {
+ xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+ if (!xen_node) {
pr_debug("No Xen support\n");
- return 0;
+ return;
}
- s = of_get_property(node, "compatible", &len);
+ s = of_get_property(xen_node, "compatible", &len);
if (strlen(xen_prefix) + 3 < len &&
!strncmp(xen_prefix, s, strlen(xen_prefix)))
version = s + strlen(xen_prefix);
if (version == NULL) {
pr_debug("Xen version not found\n");
- return 0;
+ return;
}
- if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
- return 0;
- grant_frames = res.start;
- xen_events_irq = irq_of_parse_and_map(node, 0);
- pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n",
- version, xen_events_irq, &grant_frames);
- if (xen_events_irq < 0)
- return -ENODEV;
+ pr_info("Xen %s support found\n", version);
xen_domain_type = XEN_HVM_DOMAIN;
else
xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- get_zeroed_page(GFP_KERNEL);
+ if (!console_set_on_cmdline && !xen_initial_domain())
+ add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init xen_guest_init(void)
+{
+ struct xen_add_to_physmap xatp;
+ struct shared_info *shared_info_page = NULL;
+ struct resource res;
+ phys_addr_t grant_frames;
+
+ if (!xen_domain())
+ return 0;
+
+ if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
+ pr_err("Xen grant table base address not found\n");
+ return -ENODEV;
+ }
+ grant_frames = res.start;
+
+ xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+ if (!xen_events_irq) {
+ pr_err("Xen event channel interrupt not found\n");
+ return -ENODEV;
+ }
+
+ shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
+
if (!shared_info_page) {
pr_err("not enough memory\n");
return -ENOMEM;
#include <xen/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
#include <xen/xen.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
#include <asm/psci.h>
#include <asm/efi.h>
#include <asm/virt.h>
+#include <asm/xen/hypervisor.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
} else {
psci_acpi_init();
}
+ xen_early_init();
cpu_read_bootcpu_ops();
#ifdef CONFIG_SMP
irq_set_chip_data(eic->first_irq + i, eic);
}
- irq_set_chained_handler(int_irq, demux_eic_irq);
- irq_set_handler_data(int_irq, eic);
+ irq_set_chained_handler_and_data(int_irq, demux_eic_irq, eic);
if (pdev->id == 0) {
nmi_eic = eic;
void __init psc_register_interrupts(void)
{
- irq_set_chained_handler(IRQ_AUTO_3, psc_irq);
- irq_set_handler_data(IRQ_AUTO_3, (void *)0x30);
- irq_set_chained_handler(IRQ_AUTO_4, psc_irq);
- irq_set_handler_data(IRQ_AUTO_4, (void *)0x40);
- irq_set_chained_handler(IRQ_AUTO_5, psc_irq);
- irq_set_handler_data(IRQ_AUTO_5, (void *)0x50);
- irq_set_chained_handler(IRQ_AUTO_6, psc_irq);
- irq_set_handler_data(IRQ_AUTO_6, (void *)0x60);
+ irq_set_chained_handler_and_data(IRQ_AUTO_3, psc_irq, (void *)0x30);
+ irq_set_chained_handler_and_data(IRQ_AUTO_4, psc_irq, (void *)0x40);
+ irq_set_chained_handler_and_data(IRQ_AUTO_5, psc_irq, (void *)0x50);
+ irq_set_chained_handler_and_data(IRQ_AUTO_6, psc_irq, (void *)0x60);
}
void psc_irq_enable(int irq) {
irq = irq_create_mapping(domain, AR2315_MISC_IRQ_AHB);
setup_irq(irq, &ar2315_ahb_err_interrupt);
- irq_set_chained_handler(AR2315_IRQ_MISC, ar2315_misc_irq_handler);
- irq_set_handler_data(AR2315_IRQ_MISC, domain);
+ irq_set_chained_handler_and_data(AR2315_IRQ_MISC,
+ ar2315_misc_irq_handler, domain);
ar2315_misc_irq_domain = domain;
}
irq = irq_create_mapping(domain, AR5312_MISC_IRQ_AHB_PROC);
setup_irq(irq, &ar5312_ahb_err_interrupt);
- irq_set_chained_handler(AR5312_IRQ_MISC, ar5312_misc_irq_handler);
- irq_set_handler_data(AR5312_IRQ_MISC, domain);
+ irq_set_chained_handler_and_data(AR5312_IRQ_MISC,
+ ar5312_misc_irq_handler, domain);
ar5312_misc_irq_domain = domain;
}
apc->irq_ext = irq_create_mapping(apc->domain, AR2315_PCI_IRQ_EXT);
- irq_set_chained_handler(apc->irq, ar2315_pci_irq_handler);
- irq_set_handler_data(apc->irq, apc);
+ irq_set_chained_handler_and_data(apc->irq, ar2315_pci_irq_handler,
+ apc);
/* Clear any pending Abort or external Interrupts
* and enable interrupt processing */
rt_intc_w32(INTC_INT_GLOBAL, INTC_REG_ENABLE);
- irq_set_chained_handler(irq, ralink_intc_irq_handler);
- irq_set_handler_data(irq, domain);
+ irq_set_chained_handler_and_data(irq, ralink_intc_irq_handler, domain);
/* tell the kernel which irq is used for performance monitoring */
rt_perfcount_irq = irq_create_mapping(domain, 9);
if (irqd_is_per_cpu(data))
continue;
- if (cpumask_test_cpu(self, &data->affinity) &&
+ if (cpumask_test_cpu(self, data->affinity) &&
!cpumask_intersects(&irq_affinity[irq], cpu_online_mask)) {
int cpu_id;
cpu_id = cpumask_first(cpu_online_mask);
- cpumask_set_cpu(cpu_id, &data->affinity);
+ cpumask_set_cpu(cpu_id, data->affinity);
}
/* We need to operate irq_affinity_online atomically. */
arch_local_cli_save(flags);
GxICR(irq) = x & GxICR_LEVEL;
tmp = GxICR(irq);
- new = cpumask_any_and(&data->affinity,
+ new = cpumask_any_and(data->affinity,
cpu_online_mask);
irq_affinity_online[irq] = new;
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_READABLE_ASM=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_SELFTEST=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_PROVE_RCU=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_NOTIFIER_ERROR_INJECTION=m
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_PERF=y
CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
-CONFIG_NR_CPUS=256
+CONFIG_NR_CPUS=512
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_NF_TABLES_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NF_TABLES_ARP=m
CONFIG_NF_NAT_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NF_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_BLK_DEV_OSD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_XIP=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_ATA_OVER_ETH=m
CONFIG_VIRTIO_BLK=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
CONFIG_RCU_TORTURE_TEST=m
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z196=y
CONFIG_NR_CPUS=256
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_PI_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_PROVE_RCU=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_CPU_STALL_INFO is not set
CONFIG_RCU_TRACE=y
CONFIG_LATENCYTOP=y
CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENT=y
CONFIG_KPROBES_SANITY_TEST=y
# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_ZCRYPT=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
#ifndef _ASM_S390_CPU_H
#define _ASM_S390_CPU_H
-#define MAX_CPU_ADDRESS 255
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
};
extern struct dump_save_areas dump_save_areas;
-struct save_area_ext *dump_save_area_create(int cpu);
extern void do_reipl(void);
extern void do_halt(void);
#include <asm/cpu.h>
#define SCLP_CHP_INFO_MASK_SIZE 32
+#define SCLP_MAX_CORES 256
struct sclp_chp_info {
u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
char loadparm[LOADPARM_LEN];
};
-struct sclp_cpu_entry {
+struct sclp_core_entry {
u8 core_id;
u8 reserved0[2];
u8 : 3;
u8 reserved1;
} __attribute__((packed));
-struct sclp_cpu_info {
+struct sclp_core_info {
unsigned int configured;
unsigned int standby;
unsigned int combined;
- int has_cpu_type;
- struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
+ struct sclp_core_entry core[SCLP_MAX_CORES];
};
struct sclp_info {
unsigned char has_vt220 : 1;
unsigned char has_siif : 1;
unsigned char has_sigpif : 1;
- unsigned char has_cpu_type : 1;
+ unsigned char has_core_type : 1;
unsigned char has_sprp : 1;
unsigned int ibc;
unsigned int mtid;
unsigned long long rzm;
unsigned long long rnmax;
unsigned long long hamax;
- unsigned int max_cpu;
+ unsigned int max_cores;
unsigned long hsa_size;
unsigned long long facilities;
};
extern struct sclp_info sclp;
-int sclp_get_cpu_info(struct sclp_cpu_info *info);
-int sclp_cpu_configure(u8 cpu);
-int sclp_cpu_deconfigure(u8 cpu);
+int sclp_get_core_info(struct sclp_core_info *info);
+int sclp_core_configure(u8 core);
+int sclp_core_deconfigure(u8 core);
int sclp_sdias_blk_count(void);
int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
int sclp_chp_configure(struct chp_id chpid);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
+extern void smp_save_dump_cpus(void);
extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_save_dump_cpus(void) { }
#endif /* CONFIG_SMP */
#
# Calls diag 308 subcode 1 and continues execution
#
-# The following conditions must be ensured before calling this function:
-# * Prefix register = 0
-# * Lowcore protection is disabled
-#
ENTRY(diag308_reset)
larl %r4,.Lctlregs # Save control registers
stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,.Lctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
larl %r4,.Lfpctl # Floating point control register
stfpc 0(%r4)
+ larl %r4,.Lprefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,.Lprefix_zero # Set prefix register to 0
+ spx 0(%r4)
larl %r4,.Lcontinue_psw # Save PSW flags
epsw %r2,%r3
stm %r2,%r3,0(%r4)
lctlg %c0,%c15,0(%r4)
larl %r4,.Lfpctl # Restore floating point ctl register
lfpc 0(%r4)
+ larl %r4,.Lprefix # Restore prefix register
+ spx 0(%r4)
larl %r4,.Lcontinue_psw # Restore PSW flags
lpswe 0(%r4)
.Lcontinue:
.section .bss
.align 8
+.Lctlreg0:
+ .quad 0
.Lctlregs:
.rept 16
.quad 0
.endr
.Lfpctl:
.long 0
+.Lprefix:
+ .long 0
+.Lprefix_zero:
+ .long 0
.previous
struct dump_save_areas dump_save_areas;
-/*
- * Allocate and add a save area for a CPU
- */
-struct save_area_ext *dump_save_area_create(int cpu)
-{
- struct save_area_ext **save_areas, *save_area;
-
- save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
- if (!save_area)
- return NULL;
- if (cpu + 1 > dump_save_areas.count) {
- dump_save_areas.count = cpu + 1;
- save_areas = krealloc(dump_save_areas.areas,
- dump_save_areas.count * sizeof(void *),
- GFP_KERNEL | __GFP_ZERO);
- if (!save_areas) {
- kfree(save_area);
- return NULL;
- }
- dump_save_areas.areas = save_areas;
- }
- dump_save_areas.areas[cpu] = save_area;
- return save_area;
-}
-
/*
* Return physical address for virtual address
*/
ptr += len;
/* Copy lower halves of SIMD registers 0-15 */
for (i = 0; i < 16; i++) {
- memcpy(ptr, &vx_regs[i], 8);
+ memcpy(ptr, &vx_regs[i].u[2], 8);
ptr += 8;
}
return ptr;
}
#define param_check_sfb_size(name, p) __param_check(name, p, void)
-static struct kernel_param_ops param_ops_sfb_size = {
+static const struct kernel_param_ops param_ops_sfb_size = {
.set = param_set_sfb_size,
.get = param_get_sfb_size,
};
check_initrd();
reserve_crashkernel();
+ /*
+ * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Therefore CPU and device initialization should be done afterwards.
+ */
+ smp_save_dump_cpus();
setup_resources();
setup_vmcoreinfo();
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/crash_dump.h>
+#include <linux/memblock.h>
#include <asm/asm-offsets.h>
#include <asm/switch_to.h>
#include <asm/facility.h>
u16 address; /* physical cpu address */
};
-static u8 boot_cpu_type;
+static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
unsigned int smp_cpu_mt_shift;
#ifdef CONFIG_CRASH_DUMP
-static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
+static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
+ int is_boot_cpu)
{
- void *lc = pcpu_devices[0].lowcore;
- struct save_area_ext *sa_ext;
+ void *lc = (void *)(unsigned long) store_prefix();
unsigned long vx_sa;
- sa_ext = dump_save_area_create(cpu);
- if (!sa_ext)
- panic("could not allocate memory for save area\n");
if (is_boot_cpu) {
/* Copy the registers of the boot CPU. */
copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
if (!MACHINE_HAS_VX)
return;
/* Get the VX registers */
- vx_sa = __get_free_page(GFP_KERNEL);
+ vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!vx_sa)
panic("could not allocate memory for VX save area\n");
__pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
- free_page(vx_sa);
+ memblock_free(vx_sa, PAGE_SIZE);
}
+int smp_store_status(int cpu)
+{
+ unsigned long vx_sa;
+ struct pcpu *pcpu;
+
+ pcpu = pcpu_devices + cpu;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+ 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ if (!MACHINE_HAS_VX)
+ return 0;
+ vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+ __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ vx_sa, NULL);
+ return 0;
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* Collect CPU state of the previous, crashed system.
* There are four cases:
* old system. The ELF sections are picked up by the crash_dump code
* via elfcorehdr_addr.
*/
-static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+void __init smp_save_dump_cpus(void)
{
- unsigned int cpu, address, i, j;
- int is_boot_cpu;
+#ifdef CONFIG_CRASH_DUMP
+ int addr, cpu, boot_cpu_addr, max_cpu_addr;
+ struct save_area_ext *sa_ext;
+ bool is_boot_cpu;
if (is_kdump_kernel())
/* Previous system stored the CPU states. Nothing to do. */
return;
/* Set multi-threading state to the previous system. */
pcpu_set_smt(sclp.mtid_prev);
- /* Collect CPU states. */
- cpu = 0;
- for (i = 0; i < info->configured; i++) {
- /* Skip CPUs with different CPU type. */
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+ for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
continue;
- for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
- address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
- is_boot_cpu = (address == pcpu_devices[0].address);
- if (is_boot_cpu && !OLDMEM_BASE)
- /* Skip boot CPU for standard zfcp dump. */
- continue;
- /* Get state for this CPu. */
- __smp_store_cpu_state(cpu, address, is_boot_cpu);
- }
+ cpu += 1;
}
-}
-
-int smp_store_status(int cpu)
-{
- unsigned long vx_sa;
- struct pcpu *pcpu;
-
- pcpu = pcpu_devices + cpu;
- if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
- 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
- return -EIO;
- if (!MACHINE_HAS_VX)
- return 0;
- vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
- __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
- vx_sa, NULL);
- return 0;
-}
-
+ dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8);
+ dump_save_areas.count = cpu;
+ boot_cpu_addr = stap();
+ for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8);
+ dump_save_areas.areas[cpu] = sa_ext;
+ if (!sa_ext)
+ panic("could not allocate memory for save area\n");
+ is_boot_cpu = (addr == boot_cpu_addr);
+ cpu += 1;
+ if (is_boot_cpu && !OLDMEM_BASE)
+ /* Skip boot CPU for standard zfcp dump. */
+ continue;
+ /* Get state for this CPU. */
+ __smp_store_cpu_state(sa_ext, addr, is_boot_cpu);
+ }
+ diag308_reset();
+ pcpu_set_smt(0);
#endif /* CONFIG_CRASH_DUMP */
+}
void smp_cpu_set_polarization(int cpu, int val)
{
return pcpu_devices[cpu].polarization;
}
-static struct sclp_cpu_info *smp_get_cpu_info(void)
+static struct sclp_core_info *smp_get_core_info(void)
{
static int use_sigp_detection;
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int address;
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ if (info && (use_sigp_detection || sclp_get_core_info(info))) {
use_sigp_detection = 1;
- for (address = 0; address <= MAX_CPU_ADDRESS;
+ for (address = 0;
+ address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
address += (1U << smp_cpu_mt_shift)) {
if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- info->cpu[info->configured].core_id =
+ info->core[info->configured].core_id =
address >> smp_cpu_mt_shift;
info->configured++;
}
static int smp_add_present_cpu(int cpu);
-static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
cpu = cpumask_first(&avail);
for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ if (sclp.has_core_type && info->core[i].type != boot_core_type)
continue;
- address = info->cpu[i].core_id << smp_cpu_mt_shift;
+ address = info->core[i].core_id << smp_cpu_mt_shift;
for (j = 0; j <= smp_cpu_mtid; j++) {
if (pcpu_find_address(cpu_present_mask, address + j))
continue;
static void __init smp_detect_cpus(void)
{
unsigned int cpu, mtid, c_cpus, s_cpus;
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
u16 address;
/* Get CPU information */
- info = smp_get_cpu_info();
+ info = smp_get_core_info();
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
/* Find boot CPU type */
- if (info->has_cpu_type) {
+ if (sclp.has_core_type) {
address = stap();
for (cpu = 0; cpu < info->combined; cpu++)
- if (info->cpu[cpu].core_id == address) {
+ if (info->core[cpu].core_id == address) {
/* The boot cpu dictates the cpu type. */
- boot_cpu_type = info->cpu[cpu].type;
+ boot_core_type = info->core[cpu].type;
break;
}
if (cpu >= info->combined)
panic("Could not find boot CPU type");
}
-#ifdef CONFIG_CRASH_DUMP
- /* Collect CPU state of previous system */
- smp_store_cpu_states(info);
-#endif
-
/* Set multi-threading state for the current system */
- mtid = boot_cpu_type ? sclp.mtid : sclp.mtid_cp;
+ mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
/* Print number of CPUs */
c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+ if (sclp.has_core_type &&
+ info->core[cpu].type != boot_core_type)
continue;
if (cpu < info->configured)
c_cpus += smp_cpu_mtid + 1;
sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
sclp_max = min(smp_max_threads, sclp_max);
- sclp_max = sclp.max_cpu * sclp_max ?: nr_cpu_ids;
+ sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
possible = setup_possible_cpus ?: nr_cpu_ids;
possible = min(possible, sclp_max);
for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
case 0:
if (pcpu->state != CPU_STATE_CONFIGURED)
break;
- rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+ rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
for (i = 0; i <= smp_cpu_mtid; i++) {
case 1:
if (pcpu->state != CPU_STATE_STANDBY)
break;
- rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
+ rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
for (i = 0; i <= smp_cpu_mtid; i++) {
int __ref smp_rescan_cpus(void)
{
- struct sclp_cpu_info *info;
+ struct sclp_core_info *info;
int nr;
- info = smp_get_cpu_info();
+ info = smp_get_core_info();
if (!info)
return -ENOMEM;
get_online_cpus();
({ \
/* Branch instruction needs 6 bytes */ \
int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
- _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \
+ _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
select MODULES_USE_ELF_RELA
select HAVE_ARCH_TRACEHOOK
select HAVE_SYSCALL_TRACEPOINTS
+ select USER_STACKTRACE_SUPPORT
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_DEBUG_STACKOVERFLOW
select ARCH_WANT_FRAME_POINTERS
select HAVE_CONTEXT_TRACKING
select EDAC_SUPPORT
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
# FIXME: investigate whether we need/want these options.
# select HAVE_IOREMAP_PROT
select HVC_IRQ if TILEGX
def_bool y
+# Building with ARCH=tilegx (or ARCH=tile) implies using the
+# 64-bit TILE-Gx toolchain, so force CONFIG_TILEGX on.
config TILEGX
- bool "Building for TILE-Gx (64-bit) processor"
+ def_bool ARCH != "tilepro"
select SPARSE_IRQ
select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
select HAVE_FUNCTION_TRACER
void setup_irq_regs(void);
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
#endif /* _ASM_TILE_IRQ_H */
unsigned long long interrupt_mask;
/* User interrupt-control 0 state */
unsigned long intctrl_0;
- /* Is this task currently doing a backtrace? */
- bool in_backtrace;
/* Any other miscellaneous processor state bits */
unsigned long proc_status;
#if !CHIP_HAS_FIXED_INTVEC_BASE()
* to claim the lock is held, since it will be momentarily
* if not already. There's no need to wait for a "valid"
* lock->next_ticket to become available.
+ * Use READ_ONCE() to ensure that calling this in a loop is OK.
*/
- return lock->next_ticket != lock->current_ticket;
+ int curr = READ_ONCE(lock->current_ticket);
+ int next = READ_ONCE(lock->next_ticket);
+
+ return next != curr;
}
void arch_spin_lock(arch_spinlock_t *lock);
#ifndef _ASM_TILE_SPINLOCK_64_H
#define _ASM_TILE_SPINLOCK_64_H
+#include <linux/compiler.h>
+
/* Shifts and masks for the various fields in "lock". */
#define __ARCH_SPIN_CURRENT_SHIFT 17
#define __ARCH_SPIN_NEXT_MASK 0x7fff
/* The lock is locked if a task would have to wait to get it. */
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- u32 val = lock->lock;
+ /* Use READ_ONCE() to ensure that calling this in a loop is OK. */
+ u32 val = READ_ONCE(lock->lock);
return arch_spin_current(val) != arch_spin_next(val);
}
/* Advance to the next frame. */
extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
+/* Dump just the contents of the pt_regs structure. */
+extern void tile_show_regs(struct pt_regs *);
+
/*
* Dump stack given complete register info. Use only from the
* architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
*/
-extern void tile_show_stack(struct KBacktraceIterator *, int headers);
-
-/* Dump stack of current process, with registers to seed the backtrace. */
-extern void dump_stack_regs(struct pt_regs *);
-
-/* Helper method for assembly dump_stack(). */
-extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+extern void tile_show_stack(struct KBacktraceIterator *);
#endif /* _ASM_TILE_STACK_H */
unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
void __user *unalign_jit_base; /* unalign fixup JIT base */
#endif
+ bool in_backtrace; /* currently doing backtrace? */
};
/*
/* kernel/messaging.c */
void hv_message_intr(struct pt_regs *, int intnum);
+#define TILE_NMI_DUMP_STACK 1 /* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
/* kernel/irq.c */
void tile_dev_intr(struct pt_regs *, int intnum);
#define is_arch_mappable_range(addr, size) 0
#endif
+/*
+ * Note that using this definition ignores is_arch_mappable_range(),
+ * so on tilepro code that uses user_addr_max() is constrained not
+ * to reference the tilepro user-interrupt region.
+ */
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
+
/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
#endif
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only. This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-extern long strnlen_user_asm(const char __user *str, long n);
-static inline long __must_check strnlen_user(const char __user *str, long n)
-{
- might_fault();
- return strnlen_user_asm(str, n);
-}
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
- * @dst: Destination address, in kernel space. This buffer must be at
- * least @count bytes long.
- * @src: Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- * Caller must check the specified block with access_ok() before calling
- * this function.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
-static inline long __must_check __strncpy_from_user(
- char *dst, const char __user *src, long count)
-{
- might_fault();
- return strncpy_from_user_asm(dst, src, count);
-}
-static inline long __must_check strncpy_from_user(
- char *dst, const char __user *src, long count)
-{
- if (access_ok(VERIFY_READ, src, 1))
- return __strncpy_from_user(dst, src, count);
- return -EFAULT;
-}
+extern long strnlen_user(const char __user *str, long n);
+extern long strlen_user(const char __user *str);
+extern long strncpy_from_user(char *dst, const char __user *src, long);
/**
* clear_user: - Zero a block of memory in user space.
--- /dev/null
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/byteorder.h>
+
+struct word_at_a_time { /* unused */ };
+#define WORD_AT_A_TIME_CONSTANTS {}
+
+/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */
+static inline unsigned long has_zero(unsigned long val, unsigned long *data,
+ const struct word_at_a_time *c)
+{
+#ifdef __tilegx__
+ unsigned long mask = __insn_v1cmpeqi(val, 0);
+#else /* tilepro */
+ unsigned long mask = __insn_seqib(val, 0);
+#endif
+ *data = mask;
+ return mask;
+}
+
+/* These operations are both nops. */
+#define prep_zero_mask(val, data, c) (data)
+#define create_zero_mask(data) (data)
+
+/* And this operation just depends on endianness. */
+static inline long find_zero(unsigned long mask)
+{
+#ifdef __BIG_ENDIAN
+ return __builtin_clzl(mask) >> 3;
+#else
+ return __builtin_ctzl(mask) >> 3;
+#endif
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
/** hv_console_set_ipi */
#define HV_DISPATCH_CONSOLE_SET_IPI 63
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI 65
+
/** One more than the largest dispatch value */
-#define _HV_DISPATCH_END 64
+#define _HV_DISPATCH_END 66
#ifndef __ASSEMBLER__
#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL
/** Device interrupt downcall interrupt vector */
#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL 64
+
+#define HV_NMI_FLAG_FORCE 0x1 /**< Force an NMI downcall regardless of
+ the ICS bit of the client. */
#ifndef __ASSEMBLER__
int hv_dev_poll_cancel(int devhdl);
+/** NMI information */
+typedef struct
+{
+ /** Result: negative error, or HV_NMI_RESULT_xxx. */
+ int result;
+
+ /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+ HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK 0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS 1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV 2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE 0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ * This will cause the NMI to be issued on the remote tile regardless
+ * of the state of the client interrupt mask. However, if the remote
+ * tile is in the hypervisor, it will not execute the NMI, and
+ * HV_NMI_RESULT_FAIL_HV will be returned. Similarly, if the remote
+ * tile is in a client interrupt critical section at the time of the
+ * NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ * be returned. In this second case, however, if HV_NMI_FLAG_FORCE
+ * is set in flags, then the remote tile will enter its NMI interrupt
+ * vector regardless. Forcing the NMI vector during an interrupt
+ * critical section will mean that the client can not safely continue
+ * execution after handling the interrupt.
+ *
+ * @param tile Tile to which the NMI request is sent.
+ * @param info NMI information which is defined by and interpreted by the
+ * supervisor, is passed to the specified tile, and is
+ * stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ * specified tile when entering the NMI handler routine.
+ * Typically, this parameter stores the NMI type, or an aligned
+ * VA plus some special bits, etc.
+ * @param flags Flags (HV_NMI_FLAG_xxx).
+ * @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
/** Scatter-gather list for preada/pwritea calls. */
typedef struct
#if CHIP_VA_WIDTH() <= 32
{ move r0, lr; jrp lr }
STD_ENDPROC(current_text_addr)
-STD_ENTRY(dump_stack)
- { move r2, lr; lnk r1 }
- { move r4, r52; addli r1, r1, dump_stack - . }
- { move r3, sp; j _dump_stack }
- jrp lr /* keep backtracer happy */
- STD_ENDPROC(dump_stack)
-
STD_ENTRY(KBacktraceIterator_init_current)
{ move r2, lr; lnk r1 }
{ move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
gensym hv_get_ipi_pte, 0x700, 32
gensym hv_set_pte_super_shift, 0x720, 32
gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
#define hv_get_ipi_pte _hv_get_ipi_pte
#define hv_set_pte_super_shift _hv_set_pte_super_shift
#define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
#include <hv/hypervisor.h>
#undef hv_init
#undef hv_install_context
#undef hv_get_ipi_pte
#undef hv_set_pte_super_shift
#undef hv_console_set_ipi
+#undef hv_send_nmi
/*
* Provide macros based on <linux/syscalls.h> to provide a wrapper
HV_VirtAddr, tlb_va, unsigned long, tlb_length,
unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+ __hv64, flags)
.ifc \c_routine, handle_perf_interrupt
mfspr r2, AUX_PERF_COUNT_STS
.endif
+ .ifc \c_routine, do_nmi
+ mfspr r2, SPR_SYSTEM_SAVE_K_2 /* nmi type */
+ .else
+ .endif
.endif
.endif
.endif
/* Synthetic interrupt delivered only by the simulator */
int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+ /* Synthetic interrupt delivered by hv */
+ int_hand INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
#include <linux/kernel.h>
#include <linux/tracehook.h>
#include <linux/signal.h>
+#include <linux/delay.h>
#include <linux/context_tracking.h>
#include <asm/stack.h>
#include <asm/switch_to.h>
(CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
callee_regs[0] = sp; /* r30 = function */
callee_regs[1] = arg; /* r31 = arg */
- childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
p->thread.pc = (unsigned long) ret_from_kernel_thread;
return 0;
}
#endif
}
-void show_regs(struct pt_regs *regs)
+void tile_show_regs(struct pt_regs *regs)
{
- struct task_struct *tsk = validate_current();
int i;
-
- if (tsk != &corrupt_current)
- show_regs_print_info(KERN_ERR);
#ifdef __tilegx__
for (i = 0; i < 17; i++)
- pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+ pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
i, regs->regs[i], i+18, regs->regs[i+18],
i+36, regs->regs[i+36]);
- pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n",
+ pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
regs->regs[17], regs->regs[35], regs->tp);
- pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr);
+ pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
#else
for (i = 0; i < 13; i++)
- pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+ pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
+ " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
i, regs->regs[i], i+14, regs->regs[i+14],
i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
- pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n",
+ pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
regs->regs[13], regs->tp, regs->sp, regs->lr);
#endif
- pr_err(" pc : " REGFMT " ex1: %ld faultnum: %ld\n",
- regs->pc, regs->ex1, regs->faultnum);
+ pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld flags:%s%s%s%s\n",
+ regs->pc, regs->ex1, regs->faultnum,
+ is_compat_task() ? " compat" : "",
+ (regs->flags & PT_FLAGS_DISABLE_IRQ) ? " noirq" : "",
+ !(regs->flags & PT_FLAGS_CALLER_SAVES) ? " nocallersave" : "",
+ (regs->flags & PT_FLAGS_RESTORE_REGS) ? " restoreregs" : "");
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ struct KBacktraceIterator kbt;
+
+ show_regs_print_info(KERN_DEFAULT);
+ tile_show_regs(regs);
+
+ KBacktraceIterator_init(&kbt, NULL, regs);
+ tile_show_stack(&kbt);
+}
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
- dump_stack_regs(regs);
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+ int is_idle = is_idle_task(current) && !in_interrupt();
+ int cpu;
+
+ nmi_enter();
+ cpu = smp_processor_id();
+ if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+ goto done;
+
+ spin_lock(&backtrace_lock);
+ if (is_idle)
+ pr_info("CPU: %d idle\n", cpu);
+ else
+ show_regs(regs);
+ spin_unlock(&backtrace_lock);
+ atomic_dec(&backtrace_cpus);
+done:
+ nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+ struct cpumask mask;
+ HV_Coord tile;
+ unsigned int timeout;
+ int cpu;
+ int ongoing;
+ HV_NMI_Info info[NR_CPUS];
+
+ ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+ if (ongoing != 0) {
+ pr_err("Trying to do all-cpu backtrace.\n");
+ pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+ ongoing);
+ if (self) {
+ pr_err("Reporting the stack on this cpu only.\n");
+ dump_stack();
+ }
+ return;
+ }
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_copy(&backtrace_mask, &mask);
+
+ /* Backtrace for myself first. */
+ if (self)
+ dump_stack();
+
+ /* Tentatively dump stack on remote tiles via NMI. */
+ timeout = 100;
+ while (!cpumask_empty(&mask) && timeout) {
+ for_each_cpu(cpu, &mask) {
+ tile.x = cpu_x(cpu);
+ tile.y = cpu_y(cpu);
+ info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+ if (info[cpu].result == HV_NMI_RESULT_OK)
+ cpumask_clear_cpu(cpu, &mask);
+ }
+
+ mdelay(10);
+ timeout--;
+ }
+
+ /* Warn about cpus stuck in ICS and decrement their counts here. */
+ if (!cpumask_empty(&mask)) {
+ for_each_cpu(cpu, &mask) {
+ switch (info[cpu].result) {
+ case HV_NMI_RESULT_FAIL_ICS:
+ pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+ cpu, info[cpu].pc);
+ break;
+ case HV_NMI_RESULT_FAIL_HV:
+ pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+ cpu);
+ break;
+ case HV_ENOSYS:
+ pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+ goto skip_for_each;
+ default: /* should not happen */
+ pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+ cpu, info[cpu].result, info[cpu].pc);
+ break;
+ }
+ }
+skip_for_each:
+ atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+ }
}
+#endif /* __tilegx_ */
* per-CPU stack and boot info.
*/
DEFINE_PER_CPU(unsigned long, boot_sp) =
- (unsigned long)init_stack + THREAD_SIZE;
+ (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA;
#ifdef CONFIG_SMP
DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
#include <linux/mmzone.h>
#include <linux/dcache.h>
#include <linux/fs.h>
+#include <linux/hardirq.h>
#include <linux/string.h>
#include <asm/backtrace.h>
#include <asm/page.h>
if (kbt->verbose)
pr_err(" <%s while in user mode>\n", fault);
} else {
- if (kbt->verbose)
+ if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0))
pr_err(" (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
p->pc, p->sp, p->ex1);
return NULL;
return p;
}
-/* Is the pc pointing to a sigreturn trampoline? */
-static int is_sigreturn(unsigned long pc)
+/* Is the iterator pointing to a sigreturn trampoline? */
+static int is_sigreturn(struct KBacktraceIterator *kbt)
{
- return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
+ return kbt->task->mm &&
+ (kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base +
+ (ulong)&__vdso_rt_sigreturn));
}
/* Return a pt_regs pointer for a valid signal handler frame */
{
BacktraceIterator *b = &kbt->it;
- if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+ if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET &&
b->sp % sizeof(long) == 0) {
int retval;
pagefault_disable();
return NULL;
}
-static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
-{
- return is_sigreturn(kbt->it.pc);
-}
-
static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
{
struct pt_regs *p;
{
for (;;) {
do {
- if (!KBacktraceIterator_is_sigreturn(kbt))
+ if (!is_sigreturn(kbt))
return KBT_ONGOING;
} while (backtrace_next(&kbt->it));
*/
static bool start_backtrace(void)
{
- if (current->thread.in_backtrace) {
+ if (current_thread_info()->in_backtrace) {
pr_err("Backtrace requested while in backtrace!\n");
return false;
}
- current->thread.in_backtrace = true;
+ current_thread_info()->in_backtrace = true;
return true;
}
static void end_backtrace(void)
{
- current->thread.in_backtrace = false;
+ current_thread_info()->in_backtrace = false;
}
/*
* This method wraps the backtracer's more generic support.
* It is only invoked from the architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
*/
-void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+void tile_show_stack(struct KBacktraceIterator *kbt)
{
int i;
int have_mmap_sem = 0;
if (!start_backtrace())
return;
- if (headers) {
- /*
- * Add a blank line since if we are called from panic(),
- * then bust_spinlocks() spit out a space in front of us
- * and it will mess up our KERN_ERR.
- */
- pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
- kbt->task->pid, kbt->task->tgid, kbt->task->comm,
- raw_smp_processor_id(), get_cycles());
- }
kbt->verbose = 1;
i = 0;
for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
char namebuf[KSYM_NAME_LEN+100];
unsigned long address = kbt->it.pc;
- /* Try to acquire the mmap_sem as we pass into userspace. */
- if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+ /*
+ * Try to acquire the mmap_sem as we pass into userspace.
+ * If we're in an interrupt context, don't even try, since
+ * it's not safe to call e.g. d_path() from an interrupt,
+ * since it uses spin locks without disabling interrupts.
+ * Note we test "kbt->task == current", not "kbt->is_current",
+ * since we're checking that "current" will work in d_path().
+ */
+ if (kbt->task == current && address < PAGE_OFFSET &&
+ !have_mmap_sem && kbt->task->mm && !in_interrupt()) {
have_mmap_sem =
down_read_trylock(&kbt->task->mm->mmap_sem);
+ }
describe_addr(kbt, address, have_mmap_sem,
namebuf, sizeof(namebuf));
}
if (kbt->end == KBT_LOOP)
pr_err("Stack dump stopped; next frame identical to this one\n");
- if (headers)
- pr_err("Stack dump complete\n");
if (have_mmap_sem)
up_read(&kbt->task->mm->mmap_sem);
end_backtrace();
}
EXPORT_SYMBOL(tile_show_stack);
-
-/* This is called from show_regs() and _dump_stack() */
-void dump_stack_regs(struct pt_regs *regs)
-{
- struct KBacktraceIterator kbt;
- KBacktraceIterator_init(&kbt, NULL, regs);
- tile_show_stack(&kbt, 1);
-}
-EXPORT_SYMBOL(dump_stack_regs);
-
static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
ulong pc, ulong lr, ulong sp, ulong r52)
{
return regs;
}
-/* This is called from dump_stack() and just converts to pt_regs */
+/* Deprecated function currently only used by kernel_double_fault(). */
void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
{
+ struct KBacktraceIterator kbt;
struct pt_regs regs;
- dump_stack_regs(regs_to_pt_regs(®s, pc, lr, sp, r52));
+
+ regs_to_pt_regs(®s, pc, lr, sp, r52);
+ KBacktraceIterator_init(&kbt, NULL, ®s);
+ tile_show_stack(&kbt);
}
/* This is called from KBacktraceIterator_init_current() */
regs_to_pt_regs(®s, pc, lr, sp, r52));
}
-/* This is called only from kernel/sched/core.c, with esp == NULL */
+/*
+ * Called from sched_show_task() with task != NULL, or dump_stack()
+ * with task == NULL. The esp argument is always NULL.
+ */
void show_stack(struct task_struct *task, unsigned long *esp)
{
struct KBacktraceIterator kbt;
- if (task == NULL || task == current)
+ if (task == NULL || task == current) {
KBacktraceIterator_init_current(&kbt);
- else
+ KBacktraceIterator_next(&kbt); /* don't show first frame */
+ } else {
KBacktraceIterator_init(&kbt, task, NULL);
- tile_show_stack(&kbt, 0);
+ }
+ tile_show_stack(&kbt);
}
#ifdef CONFIG_STACKTRACE
/* Support generic Linux stack API too */
-void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+static void save_stack_trace_common(struct task_struct *task,
+ struct pt_regs *regs,
+ bool user,
+ struct stack_trace *trace)
{
struct KBacktraceIterator kbt;
int skip = trace->skip;
if (!start_backtrace())
goto done;
- if (task == NULL || task == current)
+ if (regs != NULL) {
+ KBacktraceIterator_init(&kbt, NULL, regs);
+ } else if (task == NULL || task == current) {
KBacktraceIterator_init_current(&kbt);
- else
+ skip++; /* don't show KBacktraceIterator_init_current */
+ } else {
KBacktraceIterator_init(&kbt, task, NULL);
+ }
for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
if (skip) {
--skip;
continue;
}
- if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+ if (i >= trace->max_entries ||
+ (!user && kbt.it.pc < PAGE_OFFSET))
break;
trace->entries[i++] = kbt.it.pc;
}
end_backtrace();
done:
+ if (i < trace->max_entries)
+ trace->entries[i++] = ULONG_MAX;
trace->nr_entries = i;
}
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+ save_stack_trace_common(task, NULL, false, trace);
+}
EXPORT_SYMBOL(save_stack_trace_tsk);
void save_stack_trace(struct stack_trace *trace)
{
- save_stack_trace_tsk(NULL, trace);
+ save_stack_trace_common(NULL, NULL, false, trace);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ save_stack_trace_common(NULL, regs, false, trace);
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+ /* Trace user stack if we are not a kernel thread. */
+ if (current->mm)
+ save_stack_trace_common(NULL, task_pt_regs(current),
+ true, trace);
+ else if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
#endif
/* In entry.S */
exception_exit(prev_state);
}
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+ switch (reason) {
+ case TILE_NMI_DUMP_STACK:
+ do_nmi_dump_stack(regs);
+ break;
+ default:
+ panic("Unexpected do_nmi type %ld", reason);
+ return;
+ }
+}
+
+/* Deprecated function currently only used here. */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
{
_dump_stack(dummy, pc, lr, sp, r52);
u64 ns;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->wall_time_sec;
ns = vdso->wall_time_snsec;
ns += vgetsns(vdso);
u64 ns;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->monotonic_time_sec;
ns = vdso->monotonic_time_snsec;
ns += vgetsns(vdso);
unsigned count;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->wall_time_coarse_sec;
ts->tv_nsec = vdso->wall_time_coarse_nsec;
} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
unsigned count;
do {
- count = read_seqcount_begin(&vdso->tb_seq);
+ count = raw_read_seqcount_begin(&vdso->tb_seq);
ts->tv_sec = vdso->monotonic_time_coarse_sec;
ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
/* The use of the timezone is obsolete, normally tz is NULL. */
if (unlikely(tz != NULL)) {
do {
- count = read_seqcount_begin(&vdso->tz_seq);
+ count = raw_read_seqcount_begin(&vdso->tz_seq);
tz->tz_minuteswest = vdso->tz_minuteswest;
tz->tz_dsttime = vdso->tz_dsttime;
} while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
/* arch/tile/lib/usercopy.S */
#include <linux/uaccess.h>
-EXPORT_SYMBOL(strnlen_user_asm);
-EXPORT_SYMBOL(strncpy_from_user_asm);
EXPORT_SYMBOL(clear_user_asm);
EXPORT_SYMBOL(flush_user_asm);
EXPORT_SYMBOL(finv_user_asm);
#include <linux/kernel.h>
#include <asm/processor.h>
EXPORT_SYMBOL(current_text_addr);
-EXPORT_SYMBOL(dump_stack);
/* arch/tile/kernel/head.S */
EXPORT_SYMBOL(empty_zero_page);
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
u32 iterations = 0;
- while (arch_spin_is_locked(lock))
+ int curr = READ_ONCE(lock->current_ticket);
+ int next = READ_ONCE(lock->next_ticket);
+
+ /* Return immediately if unlocked. */
+ if (next == curr)
+ return;
+
+ /* Wait until the current locker has released the lock. */
+ do {
delay_backoff(iterations++);
+ } while (READ_ONCE(lock->current_ticket) == curr);
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
u32 iterations = 0;
- while (arch_spin_is_locked(lock))
+ u32 val = READ_ONCE(lock->lock);
+ u32 curr = arch_spin_current(val);
+
+ /* Return immediately if unlocked. */
+ if (arch_spin_next(val) == curr)
+ return;
+
+ /* Wait until the current locker has released the lock. */
+ do {
delay_backoff(iterations++);
+ } while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
-/*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
- { bz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
-1: { lb_u r4, r0; addi r1, r1, -1 }
- bz r4, 2f
- { bnzt r1, 1b; addi r0, r0, 1 }
-2: { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strnlen_user_asm)
- .pushsection .fixup,"ax"
-strnlen_user_fault:
- { move r0, zero; jrp lr }
- ENDPROC(strnlen_user_fault)
- .section __ex_table,"a"
- .align 4
- .word 1b, strnlen_user_fault
- .popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2. On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
- { bz r2, 2f; move r3, r0 }
-1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
- { sb r0, r4; addi r0, r0, 1 }
- bz r4, 2f
- bnzt r2, 1b
- { sub r0, r0, r3; jrp lr }
-2: addi r0, r0, -1 /* don't count the trailing NUL */
- { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strncpy_from_user_asm)
- .pushsection .fixup,"ax"
-strncpy_from_user_fault:
- { movei r0, -EFAULT; jrp lr }
- ENDPROC(strncpy_from_user_fault)
- .section __ex_table,"a"
- .align 4
- .word 1b, strncpy_from_user_fault
- .popsection
-
/*
* clear_user_asm takes the user target address in r0 and the
* number of bytes to zero in r1.
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
-/*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
- { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
-1: { ld1u r4, r0; addi r1, r1, -1 }
- beqz r4, 2f
- { bnezt r1, 1b; addi r0, r0, 1 }
-2: { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strnlen_user_asm)
- .pushsection .fixup,"ax"
-strnlen_user_fault:
- { move r0, zero; jrp lr }
- ENDPROC(strnlen_user_fault)
- .section __ex_table,"a"
- .align 8
- .quad 1b, strnlen_user_fault
- .popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2. On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
- { beqz r2, 2f; move r3, r0 }
-1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
- { st1 r0, r4; addi r0, r0, 1 }
- beqz r4, 2f
- bnezt r2, 1b
- { sub r0, r0, r3; jrp lr }
-2: addi r0, r0, -1 /* don't count the trailing NUL */
- { sub r0, r0, r3; jrp lr }
- STD_ENDPROC(strncpy_from_user_asm)
- .pushsection .fixup,"ax"
-strncpy_from_user_fault:
- { movei r0, -EFAULT; jrp lr }
- ENDPROC(strncpy_from_user_fault)
- .section __ex_table,"a"
- .align 8
- .quad 1b, strncpy_from_user_fault
- .popsection
-
/*
* clear_user_asm takes the user target address in r0 and the
* number of bytes to zero in r1.
* interrupt away appropriately and return immediately. We can't do
* page faults for user code while in kernel mode.
*/
-void do_page_fault(struct pt_regs *regs, int fault_num,
- unsigned long address, unsigned long write)
+static inline void __do_page_fault(struct pt_regs *regs, int fault_num,
+ unsigned long address, unsigned long write)
{
int is_page_fault;
- enum ctx_state prev_state = exception_enter();
#ifdef CONFIG_KPROBES
/*
*/
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
regs->faultnum, SIGSEGV) == NOTIFY_STOP)
- goto done;
+ return;
#endif
#ifdef __tilegx__
async->is_fault = is_page_fault;
async->is_write = write;
async->address = address;
- goto done;
+ return;
}
}
#endif
handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
-done:
+void do_page_fault(struct pt_regs *regs, int fault_num,
+ unsigned long address, unsigned long write)
+{
+ enum ctx_state prev_state = exception_enter();
+ __do_page_fault(regs, fault_num, address, write);
exception_exit(prev_state);
}
-
#if CHIP_HAS_TILE_DMA()
/*
* This routine effectively re-issues asynchronous page faults
int ret;
#ifdef DEBUG
- kparam_block_sysfs_write(dsp);
+ kernel_param_lock(THIS_MODULE);
printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp);
- kparam_unblock_sysfs_write(dsp);
+ kernel_param_unlock(THIS_MODULE);
#endif
state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL);
if (file->f_mode & FMODE_WRITE)
w = 1;
- kparam_block_sysfs_write(dsp);
+ kernel_param_lock(THIS_MODULE);
mutex_lock(&hostaudio_mutex);
ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
mutex_unlock(&hostaudio_mutex);
- kparam_unblock_sysfs_write(dsp);
+ kernel_param_unlock(THIS_MODULE);
if (ret < 0) {
kfree(state);
if (file->f_mode & FMODE_WRITE)
w = 1;
- kparam_block_sysfs_write(mixer);
+ kernel_param_lock(THIS_MODULE);
mutex_lock(&hostaudio_mutex);
ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
mutex_unlock(&hostaudio_mutex);
- kparam_unblock_sysfs_write(mixer);
+ kernel_param_unlock(THIS_MODULE);
if (ret < 0) {
- kparam_block_sysfs_write(dsp);
+ kernel_param_lock(THIS_MODULE);
printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', "
"err = %d\n", dsp, -ret);
- kparam_unblock_sysfs_write(dsp);
+ kernel_param_unlock(THIS_MODULE);
kfree(state);
return ret;
}
static int __init hostaudio_init_module(void)
{
- __kernel_param_lock();
+ kernel_param_lock(THIS_MODULE);
printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
dsp, mixer);
- __kernel_param_unlock();
+ kernel_param_unlock(THIS_MODULE);
module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
if (module_data.dev_audio < 0) {
--- /dev/null
+# global x86 required specific stuff
+# On 32-bit HIGHMEM4G is not allowed
+CONFIG_HIGHMEM64G=y
+CONFIG_64BIT=y
+
+# These enable us to allow some of the
+# not so generic stuff below
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_X86_MCE=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_CPU_FREQ=y
+
+# x86 xen specific config options
+CONFIG_XEN_PVH=y
+CONFIG_XEN_MAX_DOMAIN_MEMORY=500
+CONFIG_XEN_SAVE_RESTORE=y
+# CONFIG_XEN_DEBUG_FS is not set
+CONFIG_XEN_MCE_LOG=y
+CONFIG_XEN_ACPI_PROCESSOR=m
+# x86 specific backend drivers
+CONFIG_XEN_PCIDEV_BACKEND=m
+# x86 specific frontend drivers
+CONFIG_XEN_PCIDEV_FRONTEND=m
+# depends on MEMORY_HOTPLUG, arm64 doesn't enable this yet,
+# move to generic config if it ever does.
+CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y
crypto_fpu_exit();
}
-module_init(aesni_init);
+late_initcall(aesni_init);
module_exit(aesni_exit);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
return 0;
}
-static struct kernel_param_ops audit_param_ops = {
+static const struct kernel_param_ops audit_param_ops = {
.set = mmu_audit_set,
.get = param_get_bool,
};
return 0;
}
-static struct kernel_param_ops param_ops_local64 = {
+static const struct kernel_param_ops param_ops_local64 = {
.get = param_get_local64,
.set = param_set_local64,
};
goto out_put_request;
}
- ret = -EFAULT;
- if (blk_fill_sghdr_rq(q, rq, hdr, mode))
+ ret = blk_fill_sghdr_rq(q, rq, hdr, mode);
+ if (ret < 0)
goto out_free_cdb;
ret = 0;
return -ENOMEM;
error = request_range(start, end, space_id, flags, desc);
- if (error)
+ if (error) {
+ kfree(reg);
return error;
+ }
reg->start = start;
reg->end = end;
bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
{
if (is_of_node(fwnode))
- return of_property_read_bool(of_node(fwnode), propname);
+ return of_property_read_bool(to_of_node(fwnode), propname);
else if (is_acpi_node(fwnode))
- return !acpi_dev_prop_get(acpi_node(fwnode), propname, NULL);
+ return !acpi_dev_prop_get(to_acpi_node(fwnode), propname, NULL);
return !!pset_prop_get(to_pset(fwnode), propname);
}
({ \
int _ret_; \
if (is_of_node(_fwnode_)) \
- _ret_ = OF_DEV_PROP_READ_ARRAY(of_node(_fwnode_), _propname_, \
+ _ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_, \
_type_, _val_, _nval_); \
else if (is_acpi_node(_fwnode_)) \
- _ret_ = acpi_dev_prop_read(acpi_node(_fwnode_), _propname_, \
+ _ret_ = acpi_dev_prop_read(to_acpi_node(_fwnode_), _propname_, \
_proptype_, _val_, _nval_); \
else \
_ret_ = pset_prop_read_array(to_pset(_fwnode_), _propname_, \
{
if (is_of_node(fwnode))
return val ?
- of_property_read_string_array(of_node(fwnode), propname,
- val, nval) :
- of_property_count_strings(of_node(fwnode), propname);
+ of_property_read_string_array(to_of_node(fwnode),
+ propname, val, nval) :
+ of_property_count_strings(to_of_node(fwnode), propname);
else if (is_acpi_node(fwnode))
- return acpi_dev_prop_read(acpi_node(fwnode), propname,
+ return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
DEV_PROP_STRING, val, nval);
return pset_prop_read_array(to_pset(fwnode), propname,
const char *propname, const char **val)
{
if (is_of_node(fwnode))
- return of_property_read_string(of_node(fwnode), propname, val);
+ return of_property_read_string(to_of_node(fwnode), propname, val);
else if (is_acpi_node(fwnode))
- return acpi_dev_prop_read(acpi_node(fwnode), propname,
+ return acpi_dev_prop_read(to_acpi_node(fwnode), propname,
DEV_PROP_STRING, val, 1);
return -ENXIO;
if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
struct device_node *node;
- node = of_get_next_available_child(dev->of_node, of_node(child));
+ node = of_get_next_available_child(dev->of_node, to_of_node(child));
if (node)
return &node->fwnode;
} else if (IS_ENABLED(CONFIG_ACPI)) {
struct acpi_device *node;
- node = acpi_get_next_child(dev, acpi_node(child));
+ node = acpi_get_next_child(dev, to_acpi_node(child));
if (node)
return acpi_fwnode_handle(node);
}
void fwnode_handle_put(struct fwnode_handle *fwnode)
{
if (is_of_node(fwnode))
- of_node_put(of_node(fwnode));
+ of_node_put(to_of_node(fwnode));
}
EXPORT_SYMBOL_GPL(fwnode_handle_put);
return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
}
-static struct kernel_param_ops null_queue_mode_param_ops = {
+static const struct kernel_param_ops null_queue_mode_param_ops = {
.set = null_set_queue_mode,
.get = param_get_int,
};
NULL_IRQ_TIMER);
}
-static struct kernel_param_ops null_irqmode_param_ops = {
+static const struct kernel_param_ops null_irqmode_param_ops = {
.set = null_set_irqmode,
.get = param_get_int,
};
return 0;
}
+static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+
+ nvmeq->tags = NULL;
+}
+
static int nvme_admin_init_request(void *data, struct request *req,
unsigned int hctx_idx, unsigned int rq_idx,
unsigned int numa_node)
return;
}
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
- req->errors = status;
+ if (cmd_rq->ctx == CMD_CTX_CANCELLED)
+ req->errors = -EINTR;
+ else
+ req->errors = status;
} else {
req->errors = nvme_error_status(status);
}
int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
{
- struct nvme_command c = {
- .identify.opcode = nvme_admin_identify,
- .identify.cns = cpu_to_le32(1),
- };
+ struct nvme_command c = { };
int error;
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
if (!*id)
return -ENOMEM;
int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
struct nvme_id_ns **id)
{
- struct nvme_command c = {
- .identify.opcode = nvme_admin_identify,
- .identify.nsid = cpu_to_le32(nsid),
- };
+ struct nvme_command c = { };
int error;
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
return -ENOMEM;
int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
{
- struct nvme_command c = {
- .common.opcode = nvme_admin_get_log_page,
- .common.nsid = cpu_to_le32(0xFFFFFFFF),
- .common.cdw10[0] = cpu_to_le32(
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
NVME_LOG_SMART),
- };
- int error;
*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
if (!*log)
.queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
+ .exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_admin_init_request,
.timeout = nvme_timeout,
};
}
if (!blk_get_queue(dev->admin_q)) {
nvme_dev_remove_admin(dev);
+ dev->admin_q = NULL;
return -ENODEV;
}
} else
}
kfree(ctrl);
- dev->tagset.ops = &nvme_mq_ops;
- dev->tagset.nr_hw_queues = dev->online_queues - 1;
- dev->tagset.timeout = NVME_IO_TIMEOUT;
- dev->tagset.numa_node = dev_to_node(dev->dev);
- dev->tagset.queue_depth =
+ if (!dev->tagset.tags) {
+ dev->tagset.ops = &nvme_mq_ops;
+ dev->tagset.nr_hw_queues = dev->online_queues - 1;
+ dev->tagset.timeout = NVME_IO_TIMEOUT;
+ dev->tagset.numa_node = dev_to_node(dev->dev);
+ dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
- dev->tagset.cmd_size = nvme_cmd_size(dev);
- dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
- dev->tagset.driver_data = dev;
-
- if (blk_mq_alloc_tag_set(&dev->tagset))
- return 0;
+ dev->tagset.cmd_size = nvme_cmd_size(dev);
+ dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+ dev->tagset.driver_data = dev;
+ if (blk_mq_alloc_tag_set(&dev->tagset))
+ return 0;
+ }
schedule_work(&dev->scan_work);
return 0;
}
put_device(dev->device);
nvme_free_namespaces(dev);
nvme_release_instance(dev);
- blk_mq_free_tag_set(&dev->tagset);
- blk_put_queue(dev->admin_q);
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ if (dev->admin_q)
+ blk_put_queue(dev->admin_q);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
free_tags:
nvme_dev_remove_admin(dev);
+ blk_put_queue(dev->admin_q);
+ dev->admin_q = NULL;
+ dev->queues[0]->tags = NULL;
disable:
nvme_disable_queue(dev, 0);
nvme_dev_list_remove(dev);
spin_unlock(&dev_list_lock);
} else {
nvme_unfreeze_queues(dev);
- schedule_work(&dev->scan_work);
+ nvme_dev_add(dev);
nvme_set_irq_hints(dev);
}
return 0;
}
+static void nvme_dead_ctrl(struct nvme_dev *dev)
+{
+ dev_warn(dev->dev, "Device failed to resume\n");
+ kref_get(&dev->kref);
+ if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+ dev->instance))) {
+ dev_err(dev->dev,
+ "Failed to start controller remove task\n");
+ kref_put(&dev->kref, nvme_free_dev);
+ }
+}
+
static void nvme_dev_reset(struct nvme_dev *dev)
{
+ bool in_probe = work_busy(&dev->probe_work);
+
nvme_dev_shutdown(dev);
- if (nvme_dev_resume(dev)) {
- dev_warn(dev->dev, "Device failed to resume\n");
- kref_get(&dev->kref);
- if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
- dev->instance))) {
- dev_err(dev->dev,
- "Failed to start controller remove task\n");
- kref_put(&dev->kref, nvme_free_dev);
- }
+
+ /* Synchronize with device probe so that work will see failure status
+ * and exit gracefully without trying to schedule another reset */
+ flush_work(&dev->probe_work);
+
+ /* Fail this device if reset occured during probe to avoid
+ * infinite initialization loops. */
+ if (in_probe) {
+ nvme_dead_ctrl(dev);
+ return;
}
+ /* Schedule device resume asynchronously so the reset work is available
+ * to cleanup errors that may occur during reinitialization */
+ schedule_work(&dev->probe_work);
}
static void nvme_reset_failed_dev(struct work_struct *ws)
if (!ret) {
flush_work(&dev->reset_work);
+ flush_work(&dev->probe_work);
return 0;
}
static void nvme_async_probe(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
- int result;
- result = nvme_dev_start(dev);
- if (result)
- goto reset;
-
- if (dev->online_queues > 1)
- result = nvme_dev_add(dev);
- if (result)
- goto reset;
-
- nvme_set_irq_hints(dev);
- return;
- reset:
- spin_lock(&dev_list_lock);
- if (!work_busy(&dev->reset_work)) {
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- }
- spin_unlock(&dev_list_lock);
+ if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
+ nvme_dead_ctrl(dev);
}
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
flush_work(&dev->reset_work);
flush_work(&dev->scan_work);
device_remove_file(dev->device, &dev_attr_reset_controller);
- nvme_dev_shutdown(dev);
nvme_dev_remove(dev);
+ nvme_dev_shutdown(dev);
nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
/*
* The LRU mechanism to clean the lists of persistent grants needs to
* be executed periodically. The time interval between consecutive executions
struct grant_page **pages = req->segments;
unsigned int invcount;
- invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
+ invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
req->unmap, req->unmap_pages);
work->data = req;
int rc;
rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
- pending_req->nr_pages,
+ pending_req->nr_segs,
(pending_req->operation != BLKIF_OP_READ));
return rc;
int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment *segments = NULL;
- nseg = pending_req->nr_pages;
+ nseg = pending_req->nr_segs;
indirect_grefs = INDIRECT_PAGES(nseg);
BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
pending_req->id = req->u.rw.id;
pending_req->operation = req_operation;
pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nseg;
+ pending_req->nr_segs = nseg;
if (req->operation != BLKIF_OP_INDIRECT) {
preq.dev = req->u.rw.handle;
fail_flush:
xen_blkbk_unmap(blkif, pending_req->segments,
- pending_req->nr_pages);
+ pending_req->nr_segs);
fail_response:
/* Haven't submitted any bio's yet. */
make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+ }
+
rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
+extern unsigned int xen_blkif_max_ring_order;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
#define PERSISTENT_GNT_WAS_ACTIVE 1
/* Number of requests that we can fit in a ring */
-#define XEN_BLKIF_REQS 32
+#define XEN_BLKIF_REQS_PER_PAGE 32
struct persistent_gnt {
struct page *page;
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
+ unsigned int nr_ring_pages;
};
struct seg_buf {
struct pending_req {
struct xen_blkif *blkif;
u64 id;
- int nr_pages;
+ int nr_segs;
atomic_t pendcnt;
unsigned short operation;
int status;
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
+#define RINGREF_NAME_LEN (20)
struct backend_info {
struct xenbus_device *dev;
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
- struct pending_req *req, *n;
- int i, j;
BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-
- for (i = 0; i < XEN_BLKIF_REQS; i++) {
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- goto fail;
- list_add_tail(&req->free_list,
- &blkif->pending_free);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- req->segments[j] = kzalloc(sizeof(*req->segments[0]),
- GFP_KERNEL);
- if (!req->segments[j])
- goto fail;
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
- GFP_KERNEL);
- if (!req->indirect_pages[j])
- goto fail;
- }
- }
spin_lock_init(&blkif->pending_free_lock);
init_waitqueue_head(&blkif->pending_free_wq);
init_waitqueue_head(&blkif->shutdown_wq);
return blkif;
-
-fail:
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- if (!req->segments[j])
- break;
- kfree(req->segments[j]);
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- if (!req->indirect_pages[j])
- break;
- kfree(req->indirect_pages[j]);
- }
- kfree(req);
- }
-
- kmem_cache_free(xen_blkif_cachep, blkif);
-
- return ERR_PTR(-ENOMEM);
}
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
- unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+ unsigned int nr_grefs, unsigned int evtchn)
{
int err;
if (blkif->irq)
return 0;
- err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+ err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
&blkif->blk_ring);
if (err < 0)
return err;
{
struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
break;
}
default:
i++;
}
- WARN_ON(i != XEN_BLKIF_REQS);
+ WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
kmem_cache_free(xen_blkif_cachep, blkif);
}
if (err)
goto fail;
+ err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
+ xen_blkif_max_ring_order);
+ if (err)
+ pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
- unsigned long ring_ref;
- unsigned int evtchn;
+ unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
- int err;
+ struct pending_req *req, *n;
+ int err, i, j;
pr_debug("%s %s\n", __func__, dev->otherend);
- err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
- &ring_ref, "event-channel", "%u", &evtchn, NULL);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "reading %s/ring-ref and event-channel",
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+ &evtchn);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/event-channel",
dev->otherend);
return err;
}
+ pr_info("event-channel %u\n", evtchn);
+
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+ &ring_page_order);
+ if (err != 1) {
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+ "%u", &ring_ref[0]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+ dev->otherend);
+ return err;
+ }
+ nr_grefs = 1;
+ pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
+ ring_ref[0]);
+ } else {
+ unsigned int i;
+
+ if (ring_page_order > xen_blkif_max_ring_order) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
+ dev->otherend, ring_page_order,
+ xen_blkif_max_ring_order);
+ return err;
+ }
+
+ nr_grefs = 1 << ring_page_order;
+ for (i = 0; i < nr_grefs; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+ "%u", &ring_ref[i]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/%s",
+ dev->otherend, ring_ref_name);
+ return err;
+ }
+ pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
+ }
+ }
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
+ be->blkif->nr_ring_pages = nr_grefs;
- pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
- ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+ pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
+ nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
+ for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ goto fail;
+ list_add_tail(&req->free_list, &be->blkif->pending_free);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
+ if (!req->segments[j])
+ goto fail;
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+ GFP_KERNEL);
+ if (!req->indirect_pages[j])
+ goto fail;
+ }
+ }
+
/* Map the shared frame, irq etc. */
- err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+ err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
if (err) {
- xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
- ring_ref, evtchn);
+ xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
}
return 0;
+
+fail:
+ list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+ list_del(&req->free_list);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ if (!req->segments[j])
+ break;
+ kfree(req->segments[j]);
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ if (!req->indirect_pages[j])
+ break;
+ kfree(req->indirect_pages[j]);
+ }
+ kfree(req);
+ }
+ return -ENOMEM;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+static unsigned int xen_blkif_max_ring_order;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+
+#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
+#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+/*
+ * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consist with backend.
+ */
+#define RINGREF_NAME_LEN (20)
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
- int ring_ref;
+ int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int nr_ring_pages;
struct blkif_front_ring ring;
unsigned int evtchn, irq;
struct request_queue *rq;
struct work_struct work;
struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_RING_SIZE];
+ struct blk_shadow shadow[BLK_MAX_RING_SIZE];
struct list_head grants;
struct list_head indirect_pages;
unsigned int persistent_gnts_c;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define GRANT_INVALID_REF 0
#define PARTS_PER_DISK 16
static int get_id_from_freelist(struct blkfront_info *info)
{
unsigned long free = info->shadow_free;
- BUG_ON(free >= BLK_RING_SIZE);
+ BUG_ON(free >= BLK_RING_SIZE(info));
info->shadow_free = info->shadow[free].req.u.rw.id;
info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
}
}
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
/*
* Clear persistent grants present in requests already
* on the shared ring
flush_work(&info->work);
/* Free resources associated with old device channel. */
- if (info->ring_ref != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(info->ring_ref, 0,
- (unsigned long)info->ring.sring);
- info->ring_ref = GRANT_INVALID_REF;
- info->ring.sring = NULL;
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ if (info->ring_ref[i] != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+ info->ring_ref[i] = GRANT_INVALID_REF;
+ }
}
+ free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+ info->ring.sring = NULL;
+
if (info->irq)
unbind_from_irqhandler(info->irq, info);
info->evtchn = info->irq = 0;
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
- /*
- * Copy the data received from the backend into the bvec.
- * Since bv_offset can be different than 0, and bv_len different
- * than PAGE_SIZE, we have to keep track of the current offset,
- * to be sure we are copying the data from the right shared page.
- */
for_each_sg(s->sg, sg, nseg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
shared_data = kmap_atomic(
* never have given to it (we stamp it up to BLK_RING_SIZE -
* look in get_id_from_freelist.
*/
- if (id >= BLK_RING_SIZE) {
+ if (id >= BLK_RING_SIZE(info)) {
WARN(1, "%s: response to %s has incorrect id (%ld)\n",
info->gd->disk_name, op_name(bret->operation), id);
/* We can't safely get the 'struct request' as
struct blkfront_info *info)
{
struct blkif_sring *sring;
- grant_ref_t gref;
- int err;
+ int err, i;
+ unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE;
+ grant_ref_t gref[XENBUS_MAX_RING_PAGES];
- info->ring_ref = GRANT_INVALID_REF;
+ for (i = 0; i < info->nr_ring_pages; i++)
+ info->ring_ref[i] = GRANT_INVALID_REF;
- sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+ sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+ get_order(ring_size));
if (!sring) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
return -ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+ FRONT_RING_INIT(&info->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
+ err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
if (err < 0) {
- free_page((unsigned long)sring);
+ free_pages((unsigned long)sring, get_order(ring_size));
info->ring.sring = NULL;
goto fail;
}
- info->ring_ref = gref;
+ for (i = 0; i < info->nr_ring_pages; i++)
+ info->ring_ref[i] = gref[i];
err = xenbus_alloc_evtchn(dev, &info->evtchn);
if (err)
{
const char *message = NULL;
struct xenbus_transaction xbt;
- int err;
+ int err, i;
+ unsigned int max_page_order = 0;
+ unsigned int ring_page_order = 0;
+
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "max-ring-page-order", "%u", &max_page_order);
+ if (err != 1)
+ info->nr_ring_pages = 1;
+ else {
+ ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+ info->nr_ring_pages = 1 << ring_page_order;
+ }
/* Create shared ring, alloc event channel. */
err = setup_blkring(dev, info);
goto destroy_blkring;
}
- err = xenbus_printf(xbt, dev->nodename,
- "ring-ref", "%u", info->ring_ref);
- if (err) {
- message = "writing ring-ref";
- goto abort_transaction;
+ if (info->nr_ring_pages == 1) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref", "%u", info->ring_ref[0]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ } else {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-page-order", "%u", ring_page_order);
+ if (err) {
+ message = "writing ring-page-order";
+ goto abort_transaction;
+ }
+
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
+ "%u", info->ring_ref[i]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ }
}
err = xenbus_printf(xbt, dev->nodename,
"event-channel", "%u", info->evtchn);
goto destroy_blkring;
}
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
+ info->shadow[i].req.u.rw.id = i+1;
+ info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
static int blkfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
- int err, vdevice, i;
+ int err, vdevice;
struct blkfront_info *info;
/* FIXME: Use dynamic device id if this is not set. */
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
- for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
-
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
- err = talk_to_blkback(dev, info);
- if (err) {
- kfree(info);
- dev_set_drvdata(&dev->dev, NULL);
- return err;
- }
-
return 0;
}
/* Stage 2: Set up free list. */
memset(&info->shadow, 0, sizeof(info->shadow));
- for (i = 0; i < BLK_RING_SIZE; i++)
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
info->shadow[i].req.u.rw.id = i+1;
info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+ info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
rc = blkfront_setup_indirect(info);
if (rc) {
blk_queue_max_segments(info->rq, segs);
bio_list_init(&bio_list);
INIT_LIST_HEAD(&requests);
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
/* Not in use? */
if (!copy[i].request)
continue;
segs = info->max_indirect_segments;
}
- err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+ err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
if (err)
goto out_of_memory;
* grants, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
- int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
+ int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&info->indirect_pages));
for (i = 0; i < num; i++) {
}
}
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
info->shadow[i].grants_used = kzalloc(
sizeof(info->shadow[i].grants_used[0]) * segs,
GFP_NOIO);
return 0;
out_of_memory:
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
kfree(info->shadow[i].grants_used);
info->shadow[i].grants_used = NULL;
kfree(info->shadow[i].sg);
dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
switch (backend_state) {
- case XenbusStateInitialising:
case XenbusStateInitWait:
+ if (dev->state != XenbusStateInitialising)
+ break;
+ if (talk_to_blkback(dev, info)) {
+ kfree(info);
+ dev_set_drvdata(&dev->dev, NULL);
+ break;
+ }
+ case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+ xen_blkif_max_ring_order = 0;
+ }
+
if (!xen_has_pv_disk_devices())
return -ENODEV;
return rv;
}
-static struct kernel_param_ops param_ops_timeout = {
+static const struct kernel_param_ops param_ops_timeout = {
.set = set_param_timeout,
.get = param_get_int,
};
return 0;
}
-static struct kernel_param_ops param_ops_wdog_ifnum = {
+static const struct kernel_param_ops param_ops_wdog_ifnum = {
.set = set_param_wdog_ifnum,
.get = param_get_int,
};
#define param_check_wdog_ifnum param_check_int
-static struct kernel_param_ops param_ops_str = {
+static const struct kernel_param_ops param_ops_str = {
.set = set_param_str,
.get = get_param_str,
};
exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
if (mct_int_type == MCT_INT_SPI) {
- evt->irq = mct_irqs[MCT_L0_IRQ + cpu];
- if (request_irq(evt->irq, exynos4_mct_tick_isr,
- IRQF_TIMER | IRQF_NOBALANCING,
- evt->name, mevt)) {
- pr_err("exynos-mct: cannot register IRQ %d\n",
- evt->irq);
+
+ if (evt->irq == -1)
return -EIO;
- }
- irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu));
+
+ irq_force_affinity(evt->irq, cpumask_of(cpu));
+ enable_irq(evt->irq);
} else {
enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0);
}
static void exynos4_local_timer_stop(struct clock_event_device *evt)
{
evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
- if (mct_int_type == MCT_INT_SPI)
- free_irq(evt->irq, this_cpu_ptr(&percpu_mct_tick));
- else
+ if (mct_int_type == MCT_INT_SPI) {
+ if (evt->irq != -1)
+ disable_irq_nosync(evt->irq);
+ } else {
disable_percpu_irq(mct_irqs[MCT_L0_IRQ]);
+ }
}
static int exynos4_mct_cpu_notify(struct notifier_block *self,
static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base)
{
- int err;
+ int err, cpu;
struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick);
struct clk *mct_clk, *tick_clk;
WARN(err, "MCT: can't request IRQ %d (%d)\n",
mct_irqs[MCT_L0_IRQ], err);
} else {
- irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0));
+ for_each_possible_cpu(cpu) {
+ int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+ struct mct_clock_event_device *pcpu_mevt =
+ per_cpu_ptr(&percpu_mct_tick, cpu);
+
+ pcpu_mevt->evt.irq = -1;
+
+ irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
+ if (request_irq(mct_irq,
+ exynos4_mct_tick_isr,
+ IRQF_TIMER | IRQF_NOBALANCING,
+ pcpu_mevt->name, pcpu_mevt)) {
+ pr_err("exynos-mct: cannot register IRQ (cpu%d)\n",
+ cpu);
+
+ continue;
+ }
+ pcpu_mevt->evt.irq = mct_irq;
+ }
}
err = register_cpu_notifier(&exynos4_mct_cpu_nb);
return index;
}
+/* Register for fastsleep only in oneshot mode of broadcast */
+#ifdef CONFIG_TICK_ONESHOT
static int fastsleep_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
return index;
}
-
+#endif
/*
* States for dedicated partition case.
*/
powernv_states[nr_idle_states].flags = 0;
powernv_states[nr_idle_states].target_residency = 100;
powernv_states[nr_idle_states].enter = &nap_loop;
- } else if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
+ }
+
+ /*
+ * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
+ * within this config dependency check.
+ */
+#ifdef CONFIG_TICK_ONESHOT
+ if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
/* Add FASTSLEEP state */
strcpy(powernv_states[nr_idle_states].name, "FastSleep");
powernv_states[nr_idle_states].target_residency = 300000;
powernv_states[nr_idle_states].enter = &fastsleep_loop;
}
-
+#endif
powernv_states[nr_idle_states].exit_latency =
((unsigned int)latency_ns[i]) / 1000;
qat_uclo_del_uof_obj(loader_data->fw_loader);
qat_hal_deinit(loader_data->fw_loader);
-
- if (loader_data->uof_fw)
- release_firmware(loader_data->uof_fw);
-
+ release_firmware(loader_data->uof_fw);
loader_data->uof_fw = NULL;
loader_data->fw_loader = NULL;
}
err:
for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) {
ring = &bank->rings[i];
- if (hw_data->tx_rings_mask & (1 << i) && ring->inflights)
+ if (hw_data->tx_rings_mask & (1 << i))
kfree(ring->inflights);
}
return -ENOMEM;
static int dmatest_run_set(const char *val, const struct kernel_param *kp);
static int dmatest_run_get(char *val, const struct kernel_param *kp);
-static struct kernel_param_ops run_ops = {
+static const struct kernel_param_ops run_ops = {
.set = dmatest_run_set,
.get = dmatest_run_get,
};
return param_get_bool(val, kp);
}
-static struct kernel_param_ops wait_ops = {
+static const struct kernel_param_ops wait_ops = {
.get = dmatest_wait_get,
.set = param_set_bool,
};
}
for (i = 0; i < kona_gpio->num_bank; i++) {
bank = &kona_gpio->banks[i];
- irq_set_chained_handler(bank->irq, bcm_kona_gpio_irq_handler);
- irq_set_handler_data(bank->irq, bank);
+ irq_set_chained_handler_and_data(bank->irq,
+ bcm_kona_gpio_irq_handler,
+ bank);
}
spin_lock_init(&kona_gpio->lock);
irq_gc->chip_types[1].handler = handle_edge_irq;
if (!pp->irq_shared) {
- irq_set_chained_handler(pp->irq, dwapb_irq_handler);
- irq_set_handler_data(pp->irq, gpio);
+ irq_set_chained_handler_and_data(pp->irq, dwapb_irq_handler,
+ gpio);
} else {
/*
* Request a shared IRQ since where MFD would have devices
&msic_irqchip,
handle_simple_irq);
}
- irq_set_chained_handler(mg->irq, msic_gpio_irq_handler);
- irq_set_handler_data(mg->irq, mg);
+ irq_set_chained_handler_and_data(mg->irq, msic_gpio_irq_handler, mg);
return 0;
err:
if (is_of_node(fwnode)) {
enum of_gpio_flags flags;
- desc = of_get_named_gpiod_flags(of_node(fwnode), propname, 0,
+ desc = of_get_named_gpiod_flags(to_of_node(fwnode), propname, 0,
&flags);
if (!IS_ERR(desc))
active_low = flags & OF_GPIO_ACTIVE_LOW;
} else if (is_acpi_node(fwnode)) {
struct acpi_gpio_info info;
- desc = acpi_get_gpiod_by_index(acpi_node(fwnode), propname, 0,
+ desc = acpi_get_gpiod_by_index(to_acpi_node(fwnode), propname, 0,
&info);
if (!IS_ERR(desc))
active_low = info.active_low;
return 0;
}
-static struct kernel_param_ops param_ops_ide_dev_mask = {
+static const struct kernel_param_ops param_ops_ide_dev_mask = {
.set = ide_set_dev_param_mask
};
MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions");
-static struct kernel_param_ops srp_tmo_ops;
+static const struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
return res;
}
-static struct kernel_param_ops srp_tmo_ops = {
+static const struct kernel_param_ops srp_tmo_ops = {
.get = srp_tmo_get,
.set = srp_tmo_set,
};
static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK;
#define param_check_channel_mask(name, p) __param_check(name, p, unsigned int)
-static struct kernel_param_ops param_ops_channel_mask = {
+static const struct kernel_param_ops param_ops_channel_mask = {
.set = ati_remote2_set_channel_mask,
.get = ati_remote2_get_channel_mask,
};
static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK;
#define param_check_mode_mask(name, p) __param_check(name, p, unsigned int)
-static struct kernel_param_ops param_ops_mode_mask = {
+static const struct kernel_param_ops param_ops_mode_mask = {
.set = ati_remote2_set_mode_mask,
.get = ati_remote2_get_mode_mask,
};
static unsigned int psmouse_max_proto = PSMOUSE_AUTO;
static int psmouse_set_maxproto(const char *val, const struct kernel_param *);
static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp);
-static struct kernel_param_ops param_ops_proto_abbrev = {
+static const struct kernel_param_ops param_ops_proto_abbrev = {
.set = psmouse_set_maxproto,
.get = psmouse_get_maxproto,
};
static void update_domain(struct protection_domain *domain);
static int alloc_passthrough_domain(void);
+static int protection_domain_init(struct protection_domain *domain);
/****************************************************************************
*
if (!dma_dom)
return NULL;
- spin_lock_init(&dma_dom->domain.lock);
-
- dma_dom->domain.id = domain_id_alloc();
- if (dma_dom->domain.id == 0)
+ if (protection_domain_init(&dma_dom->domain))
goto free_dma_dom;
- INIT_LIST_HEAD(&dma_dom->domain.dev_list);
+
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
kfree(domain);
}
+static int protection_domain_init(struct protection_domain *domain)
+{
+ spin_lock_init(&domain->lock);
+ mutex_init(&domain->api_lock);
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&domain->dev_list);
+
+ return 0;
+}
+
static struct protection_domain *protection_domain_alloc(void)
{
struct protection_domain *domain;
if (!domain)
return NULL;
- spin_lock_init(&domain->lock);
- mutex_init(&domain->api_lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
+ if (protection_domain_init(domain))
goto out_err;
- INIT_LIST_HEAD(&domain->dev_list);
add_domain_to_list(domain);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->pgtbl_ops)
- free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops);
/* Free the CD and ASID, if we allocated them */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
return -ENODEV;
}
- if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) {
+ if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) {
smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
dev_notice(smmu->dev, "\taddress translation ops\n");
}
{
struct iommu_callback_data *cb = data;
const struct iommu_ops *ops = cb->ops;
+ int ret;
if (!ops->add_device)
return 0;
WARN_ON(dev->iommu_group);
- return ops->add_device(dev);
+ ret = ops->add_device(dev);
+
+ /*
+ * We ignore -ENODEV errors for now, as they just mean that the
+ * device is not translated by an IOMMU. We still care about
+ * other errors and fail to initialize when they happen.
+ */
+ if (ret == -ENODEV)
+ ret = 0;
+
+ return ret;
}
static int remove_iommu_group(struct device *dev, void *data)
goto err;
}
- np = of_node(child);
+ np = to_of_node(child);
if (fwnode_property_present(child, "label")) {
fwnode_property_read_string(child, "label", &led.name);
asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK),
ASIC3_INTMASK_GINTMASK);
- irq_set_chained_handler(asic->irq_nr, asic3_irq_demux);
+ irq_set_chained_handler_and_data(asic->irq_nr, asic3_irq_demux, asic);
irq_set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING);
- irq_set_handler_data(asic->irq_nr, asic);
return 0;
}
return ret;
}
-static struct kernel_param_ops param_ops_axis = {
+static const struct kernel_param_ops param_ops_axis = {
.set = param_set_axis,
.get = param_get_int,
};
return 0;
}
-static struct kernel_param_ops ubiblock_param_ops = {
+static const struct kernel_param_ops ubiblock_param_ops = {
.set = ubiblock_set_param,
};
module_param_cb(block, &ubiblock_param_ops, NULL, 0);
int ret;
/* Try to obtain pages, decreasing order if necessary */
- gfp |= __GFP_COLD | __GFP_COMP;
+ gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
pages = alloc_pages(gfp, order);
if (pages)
struct resource *res;
void __iomem *base_addr;
u32 offset;
- int ret;
+ int ret = 0;
pdev = pdata->pdev;
dev = &pdev->dev;
struct bnx2x_alloc_pool {
struct page *page;
- dma_addr_t dma;
unsigned int offset;
};
AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR | \
AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR)
-#define HW_PRTY_ASSERT_SET_3 (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
- AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
- AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY | \
- AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
+#define HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD \
+ (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY)
+
+#define HW_PRTY_ASSERT_SET_3 (HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
#define HW_PRTY_ASSERT_SET_4 (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | \
AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)
return -ENOMEM;
}
- pool->dma = dma_map_page(&bp->pdev->dev, pool->page, 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(&bp->pdev->dev,
- pool->dma))) {
- __free_pages(pool->page, PAGES_PER_SGE_SHIFT);
- pool->page = NULL;
- BNX2X_ERR("Can't map sge\n");
- return -ENOMEM;
- }
pool->offset = 0;
}
+ mapping = dma_map_page(&bp->pdev->dev, pool->page,
+ pool->offset, SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
+ BNX2X_ERR("Can't map sge\n");
+ return -ENOMEM;
+ }
+
get_page(pool->page);
sw_buf->page = pool->page;
sw_buf->offset = pool->offset;
- mapping = pool->dma + sw_buf->offset;
dma_unmap_addr_set(sw_buf, mapping, mapping);
sge->addr_hi = cpu_to_le32(U64_HI(mapping));
return err;
}
- dma_unmap_single(&bp->pdev->dev,
- dma_unmap_addr(&old_rx_pg, mapping),
- SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_page(&bp->pdev->dev,
+ dma_unmap_addr(&old_rx_pg, mapping),
+ SGE_PAGE_SIZE, DMA_FROM_DEVICE);
/* Add one frag and update the appropriate fields in the skb */
if (fp->mode == TPA_MODE_LRO)
skb_fill_page_desc(skb, j, old_rx_pg.page,
u32 wnd_sum = 0;
/* Headers length */
- hlen = (int)(skb_transport_header(skb) - skb->data) +
- tcp_hdrlen(skb);
+ if (xmit_type & XMIT_GSO_ENC)
+ hlen = (int)(skb_inner_transport_header(skb) -
+ skb->data) +
+ inner_tcp_hdrlen(skb);
+ else
+ hlen = (int)(skb_transport_header(skb) -
+ skb->data) + tcp_hdrlen(skb);
/* Amount of data (w/o headers) on linear part of SKB*/
first_bd_sz = skb_headlen(skb) - hlen;
/* Since many fragments can share the same page, make sure to
* only unmap and free the page once.
*/
- dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
- SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
+ SGE_PAGE_SIZE, DMA_FROM_DEVICE);
put_page(page);
if (!pool->page)
return;
- /* Page was not fully fragmented. Unmap unused space */
- if (pool->offset < PAGE_SIZE) {
- dma_addr_t dma = pool->dma + pool->offset;
- int size = PAGE_SIZE - pool->offset;
-
- dma_unmap_single(&bp->pdev->dev, dma, size, DMA_FROM_DEVICE);
- }
-
put_page(pool->page);
pool->page = NULL;
{
struct bnx2x *bp = netdev_priv(dev);
int cfg_idx = bnx2x_get_link_cfg_idx(bp);
+ u32 media_type;
/* Dual Media boards present all available port types */
cmd->supported = bp->port.supported[cfg_idx] |
(bp->port.supported[cfg_idx ^ 1] &
(SUPPORTED_TP | SUPPORTED_FIBRE));
cmd->advertising = bp->port.advertising[cfg_idx];
- if (bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type ==
- ETH_PHY_SFP_1G_FIBER) {
+ media_type = bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type;
+ if (media_type == ETH_PHY_SFP_1G_FIBER) {
cmd->supported &= ~(SUPPORTED_10000baseT_Full);
cmd->advertising &= ~(ADVERTISED_10000baseT_Full);
}
cmd->lp_advertising |= ADVERTISED_100baseT_Full;
if (status & LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE)
cmd->lp_advertising |= ADVERTISED_1000baseT_Half;
- if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE)
- cmd->lp_advertising |= ADVERTISED_1000baseT_Full;
+ if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE) {
+ if (media_type == ETH_PHY_KR) {
+ cmd->lp_advertising |=
+ ADVERTISED_1000baseKX_Full;
+ } else {
+ cmd->lp_advertising |=
+ ADVERTISED_1000baseT_Full;
+ }
+ }
if (status & LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE)
cmd->lp_advertising |= ADVERTISED_2500baseX_Full;
- if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE)
- cmd->lp_advertising |= ADVERTISED_10000baseT_Full;
+ if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE) {
+ if (media_type == ETH_PHY_KR) {
+ cmd->lp_advertising |=
+ ADVERTISED_10000baseKR_Full;
+ } else {
+ cmd->lp_advertising |=
+ ADVERTISED_10000baseT_Full;
+ }
+ }
if (status & LINK_STATUS_LINK_PARTNER_20GXFD_CAPABLE)
cmd->lp_advertising |= ADVERTISED_20000baseKR2_Full;
}
return -EINVAL;
}
- if (!(bp->port.supported[cfg_idx] &
- SUPPORTED_1000baseT_Full)) {
+ if (bp->port.supported[cfg_idx] &
+ SUPPORTED_1000baseT_Full) {
+ advertising = (ADVERTISED_1000baseT_Full |
+ ADVERTISED_TP);
+
+ } else if (bp->port.supported[cfg_idx] &
+ SUPPORTED_1000baseKX_Full) {
+ advertising = ADVERTISED_1000baseKX_Full;
+ } else {
DP(BNX2X_MSG_ETHTOOL,
"1G full not supported\n");
return -EINVAL;
}
- advertising = (ADVERTISED_1000baseT_Full |
- ADVERTISED_TP);
break;
case SPEED_2500:
return -EINVAL;
}
phy_idx = bnx2x_get_cur_phy_idx(bp);
- if (!(bp->port.supported[cfg_idx]
- & SUPPORTED_10000baseT_Full) ||
- (bp->link_params.phy[phy_idx].media_type ==
+ if ((bp->port.supported[cfg_idx] &
+ SUPPORTED_10000baseT_Full) &&
+ (bp->link_params.phy[phy_idx].media_type !=
ETH_PHY_SFP_1G_FIBER)) {
+ advertising = (ADVERTISED_10000baseT_Full |
+ ADVERTISED_FIBRE);
+ } else if (bp->port.supported[cfg_idx] &
+ SUPPORTED_10000baseKR_Full) {
+ advertising = (ADVERTISED_10000baseKR_Full |
+ ADVERTISED_FIBRE);
+ } else {
DP(BNX2X_MSG_ETHTOOL,
"10G full not supported\n");
return -EINVAL;
}
- advertising = (ADVERTISED_10000baseT_Full |
- ADVERTISED_FIBRE);
break;
default:
bp->link_params.multi_phy_config = new_multi_phy_config;
if (netif_running(dev)) {
bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+ bnx2x_force_link_reset(bp);
bnx2x_link_set(bp);
}
if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) {
DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM,
"cannot get access to nvram interface\n");
+ bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_NVRAM);
return -EBUSY;
}
if (netif_running(dev)) {
bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+ bnx2x_force_link_reset(bp);
bnx2x_link_set(bp);
}
case BNX2X_FLOW_CTRL_AUTO:
switch (params->req_fc_auto_adv) {
case BNX2X_FLOW_CTRL_BOTH:
+ case BNX2X_FLOW_CTRL_RX:
*ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH;
break;
- case BNX2X_FLOW_CTRL_RX:
case BNX2X_FLOW_CTRL_TX:
*ieee_fc |=
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV_PAUSE, val);
}
-static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result)
-{ /* LD LP */
+static void bnx2x_pause_resolve(struct bnx2x_phy *phy,
+ struct link_params *params,
+ struct link_vars *vars,
+ u32 pause_result)
+{
+ struct bnx2x *bp = params->bp;
+ /* LD LP */
switch (pause_result) { /* ASYM P ASYM P */
case 0xb: /* 1 0 1 1 */
+ DP(NETIF_MSG_LINK, "Flow Control: TX only\n");
vars->flow_ctrl = BNX2X_FLOW_CTRL_TX;
break;
case 0xe: /* 1 1 1 0 */
+ DP(NETIF_MSG_LINK, "Flow Control: RX only\n");
vars->flow_ctrl = BNX2X_FLOW_CTRL_RX;
break;
case 0x7: /* 0 1 1 1 */
case 0xd: /* 1 1 0 1 */
case 0xf: /* 1 1 1 1 */
- vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH;
+ /* If the user selected to advertise RX ONLY,
+ * although we advertised both, need to enable
+ * RX only.
+ */
+ if (params->req_fc_auto_adv == BNX2X_FLOW_CTRL_BOTH) {
+ DP(NETIF_MSG_LINK, "Flow Control: RX & TX\n");
+ vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH;
+ } else {
+ DP(NETIF_MSG_LINK, "Flow Control: RX only\n");
+ vars->flow_ctrl = BNX2X_FLOW_CTRL_RX;
+ }
break;
default:
+ DP(NETIF_MSG_LINK, "Flow Control: None\n");
+ vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
break;
}
if (pause_result & (1<<0))
pause_result |= (lp_pause &
MDIO_AN_REG_ADV_PAUSE_MASK) >> 10;
DP(NETIF_MSG_LINK, "Ext PHY pause result 0x%x\n", pause_result);
- bnx2x_pause_resolve(vars, pause_result);
+ bnx2x_pause_resolve(phy, params, vars, pause_result);
}
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK)>>7;
DP(NETIF_MSG_LINK, "pause_result CL37 0x%x\n", pause_result);
}
- bnx2x_pause_resolve(vars, pause_result);
+ bnx2x_pause_resolve(phy, params, vars, pause_result);
}
pause_result |= (lp_pause &
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) >> 7;
- bnx2x_pause_resolve(vars, pause_result);
+ bnx2x_pause_resolve(phy, params, vars, pause_result);
DP(NETIF_MSG_LINK, "Ext PHY CL37 pause result 0x%x\n",
pause_result);
}
SUPPORTED_100baseT_Half |
SUPPORTED_100baseT_Full |
SUPPORTED_1000baseT_Full |
+ SUPPORTED_1000baseKX_Full |
SUPPORTED_10000baseT_Full |
+ SUPPORTED_10000baseKR_Full |
SUPPORTED_20000baseKR2_Full |
SUPPORTED_20000baseMLD2_Full |
SUPPORTED_FIBRE |
break;
case PORT_HW_CFG_NET_SERDES_IF_KR:
phy->media_type = ETH_PHY_KR;
- phy->supported &= (SUPPORTED_1000baseT_Full |
- SUPPORTED_10000baseT_Full |
+ phy->supported &= (SUPPORTED_1000baseKX_Full |
+ SUPPORTED_10000baseKR_Full |
SUPPORTED_FIBRE |
SUPPORTED_Autoneg |
SUPPORTED_Pause |
phy->media_type = ETH_PHY_KR;
phy->flags |= FLAGS_WC_DUAL_MODE;
phy->supported &= (SUPPORTED_20000baseKR2_Full |
- SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full |
+ SUPPORTED_10000baseKR_Full |
+ SUPPORTED_1000baseKX_Full |
SUPPORTED_Autoneg |
SUPPORTED_FIBRE |
SUPPORTED_Pause |
void bnx2x_calc_fc_adv(struct bnx2x *bp)
{
u8 cfg_idx = bnx2x_get_link_cfg_idx(bp);
+
+ bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
+ ADVERTISED_Pause);
switch (bp->link_vars.ieee_fc &
MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) {
- case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE:
- bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
- ADVERTISED_Pause);
- break;
-
case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH:
bp->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause |
ADVERTISED_Pause);
break;
default:
- bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause |
- ADVERTISED_Pause);
break;
}
}
if (load_mode == LOAD_DIAG) {
struct link_params *lp = &bp->link_params;
lp->loopback_mode = LOOPBACK_XGXS;
- /* do PHY loopback at 10G speed, if possible */
- if (lp->req_line_speed[cfx_idx] < SPEED_10000) {
+ /* Prefer doing PHY loopback at highest speed */
+ if (lp->req_line_speed[cfx_idx] < SPEED_20000) {
if (lp->speed_cap_mask[cfx_idx] &
- PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+ PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)
lp->req_line_speed[cfx_idx] =
- SPEED_10000;
+ SPEED_20000;
+ else if (lp->speed_cap_mask[cfx_idx] &
+ PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+ lp->req_line_speed[cfx_idx] =
+ SPEED_10000;
else
lp->req_line_speed[cfx_idx] =
SPEED_1000;
res = true;
break;
case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY:
- if (print)
- _print_next_block((*par_num)++,
- "MCP SCPAD");
+ (*par_num)++;
/* clear latched SCPAD PATIRY from MCP */
REG_WR(bp, MISC_REG_AEU_CLR_LATCH_SIGNAL,
1UL << 10);
(sig[3] & HW_PRTY_ASSERT_SET_3) ||
(sig[4] & HW_PRTY_ASSERT_SET_4)) {
int par_num = 0;
+
DP(NETIF_MSG_HW, "Was parity error: HW block parity attention:\n"
"[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n",
sig[0] & HW_PRTY_ASSERT_SET_0,
sig[2] & HW_PRTY_ASSERT_SET_2,
sig[3] & HW_PRTY_ASSERT_SET_3,
sig[4] & HW_PRTY_ASSERT_SET_4);
- if (print)
- netdev_err(bp->dev,
- "Parity errors detected in blocks: ");
+ if (print) {
+ if (((sig[0] & HW_PRTY_ASSERT_SET_0) ||
+ (sig[1] & HW_PRTY_ASSERT_SET_1) ||
+ (sig[2] & HW_PRTY_ASSERT_SET_2) ||
+ (sig[4] & HW_PRTY_ASSERT_SET_4)) ||
+ (sig[3] & HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD)) {
+ netdev_err(bp->dev,
+ "Parity errors detected in blocks: ");
+ } else {
+ print = false;
+ }
+ }
res |= bnx2x_check_blocks_with_parity0(bp,
sig[0] & HW_PRTY_ASSERT_SET_0, &par_num, print);
res |= bnx2x_check_blocks_with_parity1(bp,
BNX2X_ETH_MAC, &ramrod_flags);
} else { /* vf */
return bnx2x_vfpf_config_mac(bp, bp->dev->dev_addr,
- bp->fp->index, true);
+ bp->fp->index, set);
}
}
* function stop ramrod is sent, since as part of this ramrod FW access
* PTP registers.
*/
- bnx2x_stop_ptp(bp);
+ if (bp->flags & PTP_SUPPORTED)
+ bnx2x_stop_ptp(bp);
/* Disable HW interrupts, NAPI */
bnx2x_netif_stop(bp, 1);
bp->port.advertising[idx] |=
(ADVERTISED_1000baseT_Full |
ADVERTISED_TP);
+ } else if (bp->port.supported[idx] &
+ SUPPORTED_1000baseKX_Full) {
+ bp->link_params.req_line_speed[idx] =
+ SPEED_1000;
+ bp->port.advertising[idx] |=
+ ADVERTISED_1000baseKX_Full;
} else {
BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x speed_cap_mask 0x%x\n",
link_config,
bp->port.advertising[idx] |=
(ADVERTISED_10000baseT_Full |
ADVERTISED_FIBRE);
+ } else if (bp->port.supported[idx] &
+ SUPPORTED_10000baseKR_Full) {
+ bp->link_params.req_line_speed[idx] =
+ SPEED_10000;
+ bp->port.advertising[idx] |=
+ (ADVERTISED_10000baseKR_Full |
+ ADVERTISED_FIBRE);
} else {
BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x speed_cap_mask 0x%x\n",
link_config,
o->head_exe_request = false;
o->saved_ramrod_flags = 0;
rc = bnx2x_exe_queue_step(bp, &o->exe_queue, &ramrod_flags);
- if (rc != 0) {
+ if ((rc != 0) && (rc != 1)) {
BNX2X_ERR("execution of pending commands failed with rc %d\n",
rc);
#ifdef BNX2X_STOP_ON_ERROR
wait_queue_head_t wq;
struct phy_device *phydev;
struct device_node *phy_dn;
+ struct device_node *mdio_dn;
struct mii_bus *mii_bus;
u16 gphy_rev;
struct clk *clk_eee;
return 0;
}
+/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
+ * their internal MDIO management controller making them fail to successfully
+ * be read from or written to for the first transaction. We insert a dummy
+ * BMSR read here to make sure that phy_get_device() and get_phy_id() can
+ * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
+ * PHY device for this peripheral.
+ *
+ * Once the PHY driver is registered, we can workaround subsequent reads from
+ * there (e.g: during system-wide power management).
+ *
+ * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
+ * therefore the right location to stick that workaround. Since we do not want
+ * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
+ * Device Tree scan to limit the search area.
+ */
+static int bcmgenet_mii_bus_reset(struct mii_bus *bus)
+{
+ struct net_device *dev = bus->priv;
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device_node *np = priv->mdio_dn;
+ struct device_node *child = NULL;
+ u32 read_mask = 0;
+ int addr = 0;
+
+ if (!np) {
+ read_mask = 1 << priv->phy_addr;
+ } else {
+ for_each_available_child_of_node(np, child) {
+ addr = of_mdio_parse_addr(&dev->dev, child);
+ if (addr < 0)
+ continue;
+
+ read_mask |= 1 << addr;
+ }
+ }
+
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ if (read_mask & 1 << addr) {
+ dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr);
+ mdiobus_read(bus, addr, MII_BMSR);
+ }
+ }
+
+ return 0;
+}
+
static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
{
struct mii_bus *bus;
bus->parent = &priv->pdev->dev;
bus->read = bcmgenet_mii_read;
bus->write = bcmgenet_mii_write;
+ bus->reset = bcmgenet_mii_bus_reset;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d",
priv->pdev->name, priv->pdev->id);
{
struct device_node *dn = priv->pdev->dev.of_node;
struct device *kdev = &priv->pdev->dev;
- struct device_node *mdio_dn;
char *compat;
int ret;
if (!compat)
return -ENOMEM;
- mdio_dn = of_find_compatible_node(dn, NULL, compat);
+ priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
kfree(compat);
- if (!mdio_dn) {
+ if (!priv->mdio_dn) {
dev_err(kdev, "unable to find MDIO bus node\n");
return -ENODEV;
}
- ret = of_mdiobus_register(priv->mii_bus, mdio_dn);
+ ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn);
if (ret) {
dev_err(kdev, "failed to register MDIO bus\n");
return ret;
if (ret)
return ret;
- octnet_mdio45_access(lio, 1, LIO68XX_LED_BEACON_ADDR,
- &lio->phy_beacon_val);
+ ret = octnet_mdio45_access(lio, 1,
+ LIO68XX_LED_BEACON_ADDR,
+ &lio->phy_beacon_val);
if (ret)
return ret;
for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) {
/* could check mask as well */
- if (oct->droq[i])
- vfree(oct->droq[i]);
+ vfree(oct->droq[i]);
}
for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) {
/* could check mask as well */
- if (oct->instr_queue[i])
- vfree(oct->instr_queue[i]);
+ vfree(oct->instr_queue[i]);
}
i = oct->octeon_id;
oct->dispatch.count--;
spin_unlock_bh(&oct->dispatch.lock);
-
- if (dfree)
- vfree(dfree);
-
+ vfree(dfree);
return retval;
}
dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
octeon_droq_destroy_ring_buffers(oct, droq);
-
- if (droq->recv_buf_list)
- vfree(droq->recv_buf_list);
+ vfree(droq->recv_buf_list);
if (droq->info_base_addr)
cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
desc_size =
CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
- if (iq->request_list)
- vfree(iq->request_list);
+ vfree(iq->request_list);
if (iq->base_addr) {
q_size = iq->max_count * desc_size;
napi_complete(napi);
vnic_intr_unmask(&enic->intr[intr]);
}
- enic_poll_unlock_napi(&enic->rq[cq_rq]);
+ enic_poll_unlock_napi(&enic->rq[cq_rq], napi);
return rq_work_done;
}
*/
enic_calc_int_moderation(enic, &enic->rq[rq]);
- enic_poll_unlock_napi(&enic->rq[rq]);
+ enic_poll_unlock_napi(&enic->rq[rq], napi);
if (work_done < work_to_do) {
/* Some work done, but not enough to stay in polling,
#define _VNIC_RQ_H_
#include <linux/pci.h>
+#include <linux/netdevice.h>
#include "vnic_dev.h"
#include "vnic_cq.h"
uint64_t wr_id;
};
+enum enic_poll_state {
+ ENIC_POLL_STATE_IDLE,
+ ENIC_POLL_STATE_NAPI,
+ ENIC_POLL_STATE_POLL
+};
+
struct vnic_rq {
unsigned int index;
struct vnic_dev *vdev;
void *os_buf_head;
unsigned int pkts_outstanding;
#ifdef CONFIG_NET_RX_BUSY_POLL
-#define ENIC_POLL_STATE_IDLE 0
-#define ENIC_POLL_STATE_NAPI (1 << 0) /* NAPI owns this poll */
-#define ENIC_POLL_STATE_POLL (1 << 1) /* poll owns this poll */
-#define ENIC_POLL_STATE_NAPI_YIELD (1 << 2) /* NAPI yielded this poll */
-#define ENIC_POLL_STATE_POLL_YIELD (1 << 3) /* poll yielded this poll */
-#define ENIC_POLL_YIELD (ENIC_POLL_STATE_NAPI_YIELD | \
- ENIC_POLL_STATE_POLL_YIELD)
-#define ENIC_POLL_LOCKED (ENIC_POLL_STATE_NAPI | \
- ENIC_POLL_STATE_POLL)
-#define ENIC_POLL_USER_PEND (ENIC_POLL_STATE_POLL | \
- ENIC_POLL_STATE_POLL_YIELD)
- unsigned int bpoll_state;
- spinlock_t bpoll_lock;
+ atomic_t bpoll_state;
#endif /* CONFIG_NET_RX_BUSY_POLL */
};
#ifdef CONFIG_NET_RX_BUSY_POLL
static inline void enic_busy_poll_init_lock(struct vnic_rq *rq)
{
- spin_lock_init(&rq->bpoll_lock);
- rq->bpoll_state = ENIC_POLL_STATE_IDLE;
+ atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
}
static inline bool enic_poll_lock_napi(struct vnic_rq *rq)
{
- bool rc = true;
-
- spin_lock(&rq->bpoll_lock);
- if (rq->bpoll_state & ENIC_POLL_LOCKED) {
- WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI);
- rq->bpoll_state |= ENIC_POLL_STATE_NAPI_YIELD;
- rc = false;
- } else {
- rq->bpoll_state = ENIC_POLL_STATE_NAPI;
- }
- spin_unlock(&rq->bpoll_lock);
+ int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE,
+ ENIC_POLL_STATE_NAPI);
- return rc;
+ return (rc == ENIC_POLL_STATE_IDLE);
}
-static inline bool enic_poll_unlock_napi(struct vnic_rq *rq)
+static inline void enic_poll_unlock_napi(struct vnic_rq *rq,
+ struct napi_struct *napi)
{
- bool rc = false;
-
- spin_lock(&rq->bpoll_lock);
- WARN_ON(rq->bpoll_state &
- (ENIC_POLL_STATE_POLL | ENIC_POLL_STATE_NAPI_YIELD));
- if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD)
- rc = true;
- rq->bpoll_state = ENIC_POLL_STATE_IDLE;
- spin_unlock(&rq->bpoll_lock);
-
- return rc;
+ WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_NAPI);
+ napi_gro_flush(napi, false);
+ atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
}
static inline bool enic_poll_lock_poll(struct vnic_rq *rq)
{
- bool rc = true;
-
- spin_lock_bh(&rq->bpoll_lock);
- if (rq->bpoll_state & ENIC_POLL_LOCKED) {
- rq->bpoll_state |= ENIC_POLL_STATE_POLL_YIELD;
- rc = false;
- } else {
- rq->bpoll_state |= ENIC_POLL_STATE_POLL;
- }
- spin_unlock_bh(&rq->bpoll_lock);
+ int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE,
+ ENIC_POLL_STATE_POLL);
- return rc;
+ return (rc == ENIC_POLL_STATE_IDLE);
}
-static inline bool enic_poll_unlock_poll(struct vnic_rq *rq)
-{
- bool rc = false;
- spin_lock_bh(&rq->bpoll_lock);
- WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI);
- if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD)
- rc = true;
- rq->bpoll_state = ENIC_POLL_STATE_IDLE;
- spin_unlock_bh(&rq->bpoll_lock);
-
- return rc;
+static inline void enic_poll_unlock_poll(struct vnic_rq *rq)
+{
+ WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_POLL);
+ atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE);
}
static inline bool enic_poll_busy_polling(struct vnic_rq *rq)
{
- WARN_ON(!(rq->bpoll_state & ENIC_POLL_LOCKED));
- return rq->bpoll_state & ENIC_POLL_USER_PEND;
+ return atomic_read(&rq->bpoll_state) & ENIC_POLL_STATE_POLL;
}
#else
return true;
}
-static inline bool enic_poll_unlock_napi(struct vnic_rq *rq)
+static inline bool enic_poll_unlock_napi(struct vnic_rq *rq,
+ struct napi_struct *napi)
{
return false;
}
config GIANFAR
tristate "Gianfar Ethernet"
- depends on FSL_SOC
select FSL_PQ_MDIO
select PHYLIB
select CRC32
---help---
This driver supports the Gigabit TSEC on the MPC83xx, MPC85xx,
- and MPC86xx family of chips, and the FEC on the 8540.
+ and MPC86xx family of chips, the eTSEC on LS1021A and the FEC
+ on the 8540.
endif # NET_VENDOR_FREESCALE
#define FEC_QUIRK_BUG_CAPTURE (1 << 10)
/* Controller has only one MDIO bus */
#define FEC_QUIRK_SINGLE_MDIO (1 << 11)
+/* Controller supports RACC register */
+#define FEC_QUIRK_HAS_RACC (1 << 12)
struct fec_enet_priv_tx_q {
int index;
.driver_data = 0,
}, {
.name = "imx25-fec",
- .driver_data = FEC_QUIRK_USE_GASKET,
+ .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
}, {
.name = "imx27-fec",
- .driver_data = 0,
+ .driver_data = FEC_QUIRK_HAS_RACC,
}, {
.name = "imx28-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
- FEC_QUIRK_SINGLE_MDIO,
+ FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC,
}, {
.name = "imx6q-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
- FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358,
+ FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
+ FEC_QUIRK_HAS_RACC,
}, {
.name = "mvf600-fec",
- .driver_data = FEC_QUIRK_ENET_MAC,
+ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC,
}, {
.name = "imx6sx-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
- FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE,
+ FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+ FEC_QUIRK_HAS_RACC,
}, {
/* sentinel */
}
writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
#if !defined(CONFIG_M5272)
- /* set RX checksum */
- val = readl(fep->hwp + FEC_RACC);
- if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
- val |= FEC_RACC_OPTIONS;
- else
- val &= ~FEC_RACC_OPTIONS;
- writel(val, fep->hwp + FEC_RACC);
+ if (fep->quirks & FEC_QUIRK_HAS_RACC) {
+ /* set RX checksum */
+ val = readl(fep->hwp + FEC_RACC);
+ if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+ val |= FEC_RACC_OPTIONS;
+ else
+ val &= ~FEC_RACC_OPTIONS;
+ writel(val, fep->hwp + FEC_RACC);
+ }
#endif
/*
/* detailed rx_errors */
sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) +
- ipg_r16(IPG_FRAMETOOLONGERRRORS);
+ ipg_r16(IPG_FRAMETOOLONGERRORS);
sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS);
/* Unutilized IPG statistic registers. */
#define IPG_MCSTFRAMESRCVDOK 0xB8
#define IPG_BCSTFRAMESRCVDOK 0xBE
#define IPG_MACCONTROLFRAMESRCVD 0xC6
-#define IPG_FRAMETOOLONGERRRORS 0xC8
+#define IPG_FRAMETOOLONGERRORS 0xC8
#define IPG_INRANGELENGTHERRORS 0xCA
#define IPG_FRAMECHECKSEQERRORS 0xCC
#define IPG_FRAMESLOSTRXERRORS 0xCE
if (ret_val)
return false;
out:
- if ((hw->mac.type == e1000_pch_lpt) ||
- (hw->mac.type == e1000_pch_spt)) {
- /* Unforce SMBus mode in PHY */
- e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
- phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
- e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+ if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) {
+ /* Only unforce SMBus if ME is not active */
+ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+ /* Unforce SMBus mode in PHY */
+ e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
+ phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
- /* Unforce SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
+ /* Unforce SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_reg);
+ }
}
return true;
u32 mac_reg;
s32 ret_val = 0;
u16 phy_reg;
+ u16 oem_reg = 0;
if ((hw->mac.type < e1000_pch_lpt) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_LM) ||
if (ret_val)
goto out;
+ /* Force SMBus mode in PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+ if (ret_val)
+ goto release;
+ phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+ /* Force SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, mac_reg);
+
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) {
ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS,
- &phy_reg);
+ &oem_reg);
if (ret_val)
goto release;
+
+ phy_reg = oem_reg;
phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS;
+
ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
phy_reg);
+
if (ret_val)
goto release;
}
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val)
- goto release;
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
-
/* Set Inband ULP Exit, Reset to SMBus mode and
* Disable SMBus Release on PERST# in PHY
*/
if (to_sx) {
if (er32(WUFC) & E1000_WUFC_LNKC)
phy_reg |= I218_ULP_CONFIG1_WOL_HOST;
+ else
+ phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
phy_reg |= I218_ULP_CONFIG1_STICKY_ULP;
+ phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT;
} else {
phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT;
+ phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP;
+ phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
}
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
/* Commit ULP changes in PHY by starting auto ULP configuration */
phy_reg |= I218_ULP_CONFIG1_START;
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+ if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) &&
+ to_sx && (er32(STATUS) & E1000_STATUS_LU)) {
+ ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+ oem_reg);
+ if (ret_val)
+ goto release;
+ }
+
release:
hw->phy.ops.release(hw);
out:
if (((hw->mac.type == e1000_pch2lan) ||
(hw->mac.type == e1000_pch_lpt) ||
(hw->mac.type == e1000_pch_spt)) && link) {
- u32 reg;
+ u16 speed, duplex;
- reg = er32(STATUS);
+ e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
tipg_reg = er32(TIPG);
tipg_reg &= ~E1000_TIPG_IPGT_MASK;
- if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) {
+ if (duplex == HALF_DUPLEX && speed == SPEED_10) {
tipg_reg |= 0xFF;
/* Reduce Rx latency in analog PHY */
emi_val = 0;
+ } else if (hw->mac.type == e1000_pch_spt &&
+ duplex == FULL_DUPLEX && speed != SPEED_1000) {
+ tipg_reg |= 0xC;
+ emi_val = 1;
} else {
/* Roll back the default values */
if (ret_val)
return ret_val;
+
+ if (hw->mac.type == e1000_pch_spt) {
+ u16 data;
+ u16 ptr_gap;
+
+ if (speed == SPEED_1000) {
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = e1e_rphy_locked(hw,
+ PHY_REG(776, 20),
+ &data);
+ if (ret_val) {
+ hw->phy.ops.release(hw);
+ return ret_val;
+ }
+
+ ptr_gap = (data & (0x3FF << 2)) >> 2;
+ if (ptr_gap < 0x18) {
+ data &= ~(0x3FF << 2);
+ data |= (0x18 << 2);
+ ret_val =
+ e1e_wphy_locked(hw,
+ PHY_REG(776, 20),
+ data);
+ }
+ hw->phy.ops.release(hw);
+ if (ret_val)
+ return ret_val;
+ }
+ }
+ }
+
+ /* I217 Packet Loss issue:
+ * ensure that FEXTNVM4 Beacon Duration is set correctly
+ * on power up.
+ * Set the Beacon Duration for I217 to 8 usec
+ */
+ if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) {
+ u32 mac_reg;
+
+ mac_reg = er32(FEXTNVM4);
+ mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+ mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
+ ew32(FEXTNVM4, mac_reg);
}
/* Work-around I218 hang issue */
if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM3) ||
- (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3) ||
- (hw->mac.type == e1000_pch_spt)) {
+ (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) {
ret_val = e1000_k1_workaround_lpt_lp(hw, link);
if (ret_val)
return ret_val;
}
/**
- * e1000e_disable_aspm - Disable ASPM states
+ * __e1000e_disable_aspm - Disable ASPM states
* @pdev: pointer to PCI device struct
* @state: bit-mask of ASPM states to disable
+ * @locked: indication if this context holds pci_bus_sem locked.
*
* Some devices *must* have certain ASPM states disabled per hardware errata.
**/
-static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state, int locked)
{
struct pci_dev *parent = pdev->bus->self;
u16 aspm_dis_mask = 0;
"L1" : "");
#ifdef CONFIG_PCIEASPM
- pci_disable_link_state_locked(pdev, state);
+ if (locked)
+ pci_disable_link_state_locked(pdev, state);
+ else
+ pci_disable_link_state(pdev, state);
/* Double-check ASPM control. If not disabled by the above, the
* BIOS is preventing that from happening (or CONFIG_PCIEASPM is
aspm_dis_mask);
}
+/**
+ * e1000e_disable_aspm - Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function acquires the pci_bus_sem!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+{
+ __e1000e_disable_aspm(pdev, state, 0);
+}
+
+/**
+ * e1000e_disable_aspm_locked Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function must be called with pci_bus_sem acquired!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state)
+{
+ __e1000e_disable_aspm(pdev, state, 1);
+}
+
#ifdef CONFIG_PM
static int __e1000_resume(struct pci_dev *pdev)
{
if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1)
aspm_disable_flag |= PCIE_LINK_STATE_L1;
if (aspm_disable_flag)
- e1000e_disable_aspm(pdev, aspm_disable_flag);
+ e1000e_disable_aspm_locked(pdev, aspm_disable_flag);
pci_set_master(pdev);
if (!dev)
return -ENOMEM;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(tx_ring->tx_bi);
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!tx_ring->tx_bi)
struct device *dev = rx_ring->dev;
int bi_size;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(rx_ring->rx_bi);
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!rx_ring->rx_bi)
int i40evf_up(struct i40evf_adapter *adapter);
void i40evf_down(struct i40evf_adapter *adapter);
-void i40evf_reinit_locked(struct i40evf_adapter *adapter);
void i40evf_reset(struct i40evf_adapter *adapter);
void i40evf_set_ethtool_ops(struct net_device *netdev);
void i40evf_update_stats(struct i40evf_adapter *adapter);
adapter->tx_desc_count = new_tx_count;
adapter->rx_desc_count = new_rx_count;
- if (netif_running(netdev))
- i40evf_reinit_locked(adapter);
+ if (netif_running(netdev)) {
+ adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+ schedule_work(&adapter->reset_task);
+ }
return 0;
}
struct i40evf_adapter *adapter = netdev_priv(netdev);
adapter->tx_timeout_count++;
- if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
+ if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING |
+ I40EVF_FLAG_RESET_NEEDED))) {
adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
schedule_work(&adapter->reset_task);
}
for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
lut = 0;
for (j = 0; j < 4; j++) {
- if (cqueue == adapter->vsi_res->num_queue_pairs)
+ if (cqueue == adapter->num_active_queues)
cqueue = 0;
lut |= ((cqueue) << (8 * j));
cqueue++;
i40e_flush(hw);
}
-#define I40EVF_RESET_WAIT_MS 100
-#define I40EVF_RESET_WAIT_COUNT 200
+#define I40EVF_RESET_WAIT_MS 10
+#define I40EVF_RESET_WAIT_COUNT 500
/**
* i40evf_reset_task - Call-back task to handle hardware reset
* @work: pointer to work_struct
&adapter->crit_section))
usleep_range(500, 1000);
+ i40evf_misc_irq_disable(adapter);
if (adapter->flags & I40EVF_FLAG_RESET_NEEDED) {
- dev_info(&adapter->pdev->dev, "Requesting reset from PF\n");
+ adapter->flags &= ~I40EVF_FLAG_RESET_NEEDED;
+ /* Restart the AQ here. If we have been reset but didn't
+ * detect it, or if the PF had to reinit, our AQ will be hosed.
+ */
+ i40evf_shutdown_adminq(hw);
+ i40evf_init_adminq(hw);
i40evf_request_reset(adapter);
}
+ adapter->flags |= I40EVF_FLAG_RESET_PENDING;
/* poll until we see the reset actually happen */
for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
if ((rstat_val != I40E_VFR_VFACTIVE) &&
(rstat_val != I40E_VFR_COMPLETED))
break;
- msleep(I40EVF_RESET_WAIT_MS);
+ usleep_range(500, 1000);
}
if (i == I40EVF_RESET_WAIT_COUNT) {
- adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+ dev_info(&adapter->pdev->dev, "Never saw reset\n");
goto continue_reset; /* act like the reset happened */
}
for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
I40E_VFGEN_RSTAT_VFR_STATE_MASK;
- if ((rstat_val == I40E_VFR_VFACTIVE) ||
- (rstat_val == I40E_VFR_COMPLETED))
+ if (rstat_val == I40E_VFR_VFACTIVE)
break;
msleep(I40EVF_RESET_WAIT_MS);
}
+ /* extra wait to make sure minimum wait is met */
+ msleep(I40EVF_RESET_WAIT_MS);
if (i == I40EVF_RESET_WAIT_COUNT) {
struct i40evf_mac_filter *f, *ftmp;
struct i40evf_vlan_filter *fv, *fvtmp;
if (netif_running(adapter->netdev)) {
set_bit(__I40E_DOWN, &adapter->vsi.state);
- i40evf_irq_disable(adapter);
- i40evf_napi_disable_all(adapter);
- netif_tx_disable(netdev);
- netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+ i40evf_napi_disable_all(adapter);
+ i40evf_irq_disable(adapter);
i40evf_free_traffic_irqs(adapter);
i40evf_free_all_tx_resources(adapter);
i40evf_free_all_rx_resources(adapter);
list_del(&f->list);
kfree(f);
}
+
list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list,
list) {
list_del(&fv->list);
i40evf_shutdown_adminq(hw);
adapter->netdev->flags &= ~IFF_UP;
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+ dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
return; /* Do not attempt to reinit. It's dead, Jim. */
}
continue_reset:
- adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-
- i40evf_irq_disable(adapter);
-
if (netif_running(adapter->netdev)) {
- i40evf_napi_disable_all(adapter);
- netif_tx_disable(netdev);
- netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+ i40evf_napi_disable_all(adapter);
}
+ i40evf_irq_disable(adapter);
adapter->state = __I40EVF_RESETTING;
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+
+ /* free the Tx/Rx rings and descriptors, might be better to just
+ * re-use them sometime in the future
+ */
+ i40evf_free_all_rx_resources(adapter);
+ i40evf_free_all_tx_resources(adapter);
/* kill and reinit the admin queue */
if (i40evf_shutdown_adminq(hw))
adapter->aq_required = I40EVF_FLAG_AQ_ADD_MAC_FILTER;
adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ i40evf_misc_irq_enable(adapter);
mod_timer(&adapter->watchdog_timer, jiffies + 2);
goto reset_err;
i40evf_irq_enable(adapter, true);
+ } else {
+ adapter->state = __I40EVF_DOWN;
}
+
return;
reset_err:
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
} while (pending);
+ if ((adapter->flags &
+ (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED)) ||
+ adapter->state == __I40EVF_RESETTING)
+ goto freedom;
+
/* check for error indications */
val = rd32(hw, hw->aq.arq.len);
oldval = val;
if (oldval != val)
wr32(hw, hw->aq.asq.len, val);
+freedom:
kfree(event.msg_buf);
out:
/* re-enable Admin queue interrupt cause */
return &adapter->net_stats;
}
-/**
- * i40evf_reinit_locked - Software reinit
- * @adapter: board private structure
- *
- * Reinititalizes the ring structures in response to a software configuration
- * change. Roughly the same as close followed by open, but skips releasing
- * and reallocating the interrupts.
- **/
-void i40evf_reinit_locked(struct i40evf_adapter *adapter)
-{
- struct net_device *netdev = adapter->netdev;
- int err;
-
- WARN_ON(in_interrupt());
-
- i40evf_down(adapter);
-
- /* allocate transmit descriptors */
- err = i40evf_setup_all_tx_resources(adapter);
- if (err)
- goto err_reinit;
-
- /* allocate receive descriptors */
- err = i40evf_setup_all_rx_resources(adapter);
- if (err)
- goto err_reinit;
-
- i40evf_configure(adapter);
-
- err = i40evf_up_complete(adapter);
- if (err)
- goto err_reinit;
-
- i40evf_irq_enable(adapter, true);
- return;
-
-err_reinit:
- dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
- i40evf_close(netdev);
-}
-
/**
* i40evf_change_mtu - Change the Maximum Transfer Unit
* @netdev: network interface device structure
if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER))
return -EINVAL;
- /* must set new MTU before calling down or up */
netdev->mtu = new_mtu;
- i40evf_reinit_locked(adapter);
+ adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
+ schedule_work(&adapter->reset_task);
+
return 0;
}
/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
+ * Copyright(c) 2007-2015 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable
* @hw: pointer to the HW structure
*
- * After rx enable if managability is enabled then there is likely some
- * bad data at the start of the fifo and possibly in the DMA fifo. This
+ * After rx enable if manageability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo. This
* function clears the fifos and flushes any packets that came in as rx was
* being enabled.
**/
u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
int i, ms_wait;
+ /* disable IPv6 options as per hardware errata */
+ rfctl = rd32(E1000_RFCTL);
+ rfctl |= E1000_RFCTL_IPV6_EX_DIS;
+ wr32(E1000_RFCTL, rfctl);
+
if (hw->mac.type != e1000_82575 ||
!(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN))
return;
* incoming packets are rejected. Set enable and wait 2ms so that
* any packet that was coming in as RCTL.EN was set is flushed
*/
- rfctl = rd32(E1000_RFCTL);
wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
rlpml = rd32(E1000_RLPML);
#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
/* Header split receive */
-#define E1000_RFCTL_LEF 0x00040000
+#define E1000_RFCTL_IPV6_EX_DIS 0x00010000
+#define E1000_RFCTL_LEF 0x00040000
/* Collision related configuration parameters */
#define E1000_COLLISION_THRESHOLD 15
#define MAJ 5
#define MIN 2
-#define BUILD 15
+#define BUILD 18
#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
__stringify(BUILD) "-k"
char igb_driver_name[] = "igb";
unsigned int link;
unsigned int duplex;
unsigned int speed;
+ unsigned int tx_csum_limit;
int use_inband_status:1;
};
dev->mtu = mtu;
- if (!netif_running(dev))
+ if (!netif_running(dev)) {
+ netdev_update_features(dev);
return 0;
+ }
/* The interface is running, so we have to force a
* reallocation of the queues
mvneta_start_dev(pp);
mvneta_port_up(pp);
+ netdev_update_features(dev);
+
return 0;
}
+static netdev_features_t mvneta_fix_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct mvneta_port *pp = netdev_priv(dev);
+
+ if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) {
+ features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ netdev_info(dev,
+ "Disable IP checksum for MTU greater than %dB\n",
+ pp->tx_csum_limit);
+ }
+
+ return features;
+}
+
/* Get mac address */
static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr)
{
.ndo_set_rx_mode = mvneta_set_rx_mode,
.ndo_set_mac_address = mvneta_set_mac_addr,
.ndo_change_mtu = mvneta_change_mtu,
+ .ndo_fix_features = mvneta_fix_features,
.ndo_get_stats64 = mvneta_get_stats64,
.ndo_do_ioctl = mvneta_ioctl,
};
}
}
+ if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
+ pp->tx_csum_limit = 1600;
+
pp->tx_ring_size = MVNETA_MAX_TXD;
pp->rx_ring_size = MVNETA_MAX_RXD;
static const struct of_device_id mvneta_match[] = {
{ .compatible = "marvell,armada-370-neta" },
+ { .compatible = "marvell,armada-xp-neta" },
{ }
};
MODULE_DEVICE_TABLE(of, mvneta_match);
mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
}
- if (priv->base_tx_qpn) {
- mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num);
- priv->base_tx_qpn = 0;
- }
}
int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
}
#endif
static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
- int hwtstamp_rx_filter)
+ netdev_features_t dev_features)
{
__wsum hw_checksum = 0;
hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
- if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) &&
- hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) {
- /* next protocol non IPv4 or IPv6 */
- if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
- != htons(ETH_P_IP) &&
- ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
- != htons(ETH_P_IPV6))
- return -1;
+ if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) &&
+ !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) {
hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
hdr += sizeof(struct vlan_hdr);
}
if (ip_summed == CHECKSUM_COMPLETE) {
void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
- if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) {
+ if (check_csum(cqe, gro_skb, va,
+ dev->features)) {
ip_summed = CHECKSUM_NONE;
ring->csum_none++;
ring->csum_complete--;
}
if (ip_summed == CHECKSUM_COMPLETE) {
- if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) {
+ if (check_csum(cqe, skb, skb->data, dev->features)) {
ip_summed = CHECKSUM_NONE;
ring->csum_complete--;
ring->csum_none++;
ring->size = size;
ring->size_mask = size - 1;
ring->stride = stride;
+ ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
tmp = size * sizeof(struct mlx4_en_tx_info);
ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node);
mlx4_bf_free(mdev->dev, &ring->bf);
mlx4_qp_remove(mdev->dev, &ring->qp);
mlx4_qp_free(mdev->dev, &ring->qp);
+ mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
mlx4_en_unmap_buffer(&ring->wqres.buf);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
kfree(ring->bounce_buf);
MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
}
+static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
+{
+ return ring->prod - ring->cons > ring->full_size;
+}
+
static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, int index,
u8 owner)
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
- /*
- * Wakeup Tx queue if this stopped, and at least 1 packet
- * was completed
+ /* Wakeup Tx queue if this stopped, and ring is not full.
*/
- if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) {
+ if (netif_tx_queue_stopped(ring->tx_queue) &&
+ !mlx4_en_is_tx_ring_full(ring)) {
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
}
skb_tx_timestamp(skb);
/* Check available TXBBs And 2K spare for prefetch */
- stop_queue = (int)(ring->prod - ring_cons) >
- ring->size - HEADROOM - MAX_DESC_TXBBS;
+ stop_queue = mlx4_en_is_tx_ring_full(ring);
if (unlikely(stop_queue)) {
netif_tx_stop_queue(ring->tx_queue);
ring->queue_stopped++;
smp_rmb();
ring_cons = ACCESS_ONCE(ring->cons);
- if (unlikely(((int)(ring->prod - ring_cons)) <=
- ring->size - HEADROOM - MAX_DESC_TXBBS)) {
+ if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
}
mutex_lock(&intf_mutex);
list_add_tail(&intf->list, &intf_list);
- list_for_each_entry(priv, &dev_list, dev_list)
+ list_for_each_entry(priv, &dev_list, dev_list) {
+ if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) {
+ mlx4_dbg(&priv->dev,
+ "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol);
+ intf->flags &= ~MLX4_INTFF_BONDING;
+ }
mlx4_add_device(intf, priv);
+ }
mutex_unlock(&intf_mutex);
u32 size; /* number of TXBBs */
u32 size_mask;
u16 stride;
+ u32 full_size;
u16 cqn; /* index of port CQ associated with this ring */
u32 buf_size;
__be32 doorbell_qpn;
int vids[128];
bool wol;
struct device *ddev;
- int base_tx_qpn;
struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
struct hwtstamp_config hwtstamp_config;
u32 counter_index;
MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
-/* Careful: must be accessed under kparam_block_sysfs_write */
+/* Careful: must be accessed under kernel_param_lock() */
static char *myri10ge_fw_name = NULL;
module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
}
}
- kparam_block_sysfs_write(myri10ge_fw_name);
+ kernel_param_lock(THIS_MODULE);
if (myri10ge_fw_name != NULL) {
char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
if (fw_name) {
set_fw_name(mgp, fw_name, true);
}
}
- kparam_unblock_sysfs_write(myri10ge_fw_name);
+ kernel_param_unlock(THIS_MODULE);
if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
myri10ge_fw_names[mgp->board_number] != NULL &&
priv->ptp.current_addend = addend;
gccr = ravb_read(ndev, GCCR);
- if (gccr & GCCR_LTI)
+ if (gccr & GCCR_LTI) {
+ spin_unlock_irqrestore(&priv->lock, flags);
return -EBUSY;
+ }
ravb_write(ndev, addend & GTI_TIV, GTI);
ravb_write(ndev, gccr | GCCR_LTI, GCCR);
EDB_MASTER_EN = 0x00002000
};
-enum sis900_eeprom_access_reigster_bits {
+enum sis900_eeprom_access_register_bits {
MDC = 0x00000040, MDDIR = 0x00000020, MDIO = 0x00000010, /* 7016 specific */
EECS = 0x00000008, EECLK = 0x00000004, EEDO = 0x00000002,
EEDI = 0x00000001
RxERR = 0x00000004, RxDESC = 0x00000002, RxOK = 0x00000001
};
-enum sis900_interrupt_enable_reigster_bits {
+enum sis900_interrupt_enable_register_bits {
IE = 0x00000001
};
#define MMC_RX_OCTETCOUNT_G 0x00000188
#define MMC_RX_BROADCASTFRAME_G 0x0000018c
#define MMC_RX_MULTICASTFRAME_G 0x00000190
-#define MMC_RX_CRC_ERRROR 0x00000194
+#define MMC_RX_CRC_ERROR 0x00000194
#define MMC_RX_ALIGN_ERROR 0x00000198
#define MMC_RX_RUN_ERROR 0x0000019C
#define MMC_RX_JABBER_ERROR 0x000001A0
mmc->mmc_rx_octetcount_g += readl(ioaddr + MMC_RX_OCTETCOUNT_G);
mmc->mmc_rx_broadcastframe_g += readl(ioaddr + MMC_RX_BROADCASTFRAME_G);
mmc->mmc_rx_multicastframe_g += readl(ioaddr + MMC_RX_MULTICASTFRAME_G);
- mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERRROR);
+ mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERROR);
mmc->mmc_rx_align_error += readl(ioaddr + MMC_RX_ALIGN_ERROR);
mmc->mmc_rx_run_error += readl(ioaddr + MMC_RX_RUN_ERROR);
mmc->mmc_rx_jabber_error += readl(ioaddr + MMC_RX_JABBER_ERROR);
config VIA_RHINE
tristate "VIA Rhine support"
depends on (PCI || OF_IRQ)
+ depends on HAS_DMA
select CRC32
select MII
---help---
config VIA_VELOCITY
tristate "VIA Velocity support"
depends on (PCI || (OF_ADDRESS && OF_IRQ))
+ depends on HAS_DMA
select CRC32
select CRC_CCITT
select MII
pr_info_once("%s: %s PHY revision: 0x%02x, patch: %d\n",
dev_name(&phydev->dev), phydev->drv->name, rev, patch);
+ /* Dummy read to a register to workaround an issue upon reset where the
+ * internal inverter may not allow the first MDIO transaction to pass
+ * the MDIO management controller and make us return 0xffff for such
+ * reads.
+ */
+ phy_read(phydev, MII_BMSR);
+
switch (rev) {
case 0xb0:
ret = bcm7xxx_28nm_b0_afe_config_init(phydev);
return 0;
}
+/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
+ * their internal MDIO management controller making them fail to successfully
+ * be read from or written to for the first transaction. We insert a dummy
+ * BMSR read here to make sure that phy_get_device() and get_phy_id() can
+ * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
+ * PHY device for this peripheral.
+ *
+ * Once the PHY driver is registered, we can workaround subsequent reads from
+ * there (e.g: during system-wide power management).
+ *
+ * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
+ * therefore the right location to stick that workaround. Since we do not want
+ * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
+ * Device Tree scan to limit the search area.
+ */
+static int unimac_mdio_reset(struct mii_bus *bus)
+{
+ struct device_node *np = bus->dev.of_node;
+ struct device_node *child;
+ u32 read_mask = 0;
+ int addr;
+
+ if (!np) {
+ read_mask = ~bus->phy_mask;
+ } else {
+ for_each_available_child_of_node(np, child) {
+ addr = of_mdio_parse_addr(&bus->dev, child);
+ if (addr < 0)
+ continue;
+
+ read_mask |= 1 << addr;
+ }
+ }
+
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ if (read_mask & 1 << addr)
+ mdiobus_read(bus, addr, MII_BMSR);
+ }
+
+ return 0;
+}
+
static int unimac_mdio_probe(struct platform_device *pdev)
{
struct unimac_mdio_priv *priv;
bus->parent = &pdev->dev;
bus->read = unimac_mdio_read;
bus->write = unimac_mdio_write;
+ bus->reset = unimac_mdio_reset;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL);
for (i = 1;
i < num_ids && c45_ids->devices_in_package == 0;
i++) {
- reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2;
+retry: reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2;
phy_reg = mdiobus_read(bus, addr, reg_addr);
if (phy_reg < 0)
return -EIO;
return -EIO;
c45_ids->devices_in_package |= (phy_reg & 0xffff);
- /* If mostly Fs, there is no device there,
- * let's get out of here.
- */
if ((c45_ids->devices_in_package & 0x1fffffff) == 0x1fffffff) {
- *phy_id = 0xffffffff;
- return 0;
+ if (i) {
+ /* If mostly Fs, there is no device there,
+ * then let's continue to probe more, as some
+ * 10G PHYs have zero Devices In package,
+ * e.g. Cortina CS4315/CS4340 PHY.
+ */
+ i = 0;
+ goto retry;
+ } else {
+ /* no device there, let's get out of here */
+ *phy_id = 0xffffffff;
+ return 0;
+ }
}
}
if (phydev->supported & (SUPPORTED_1000baseT_Half |
SUPPORTED_1000baseT_Full)) {
adv |= ethtool_adv_to_mii_ctrl1000_t(advertise);
- if (adv != oldadv)
- changed = 1;
}
+ if (adv != oldadv)
+ changed = 1;
+
err = phy_write(phydev, MII_CTRL1000, adv);
if (err < 0)
return err;
#define PHY_ID_VSC8244 0x000fc6c0
#define PHY_ID_VSC8514 0x00070670
#define PHY_ID_VSC8574 0x000704a0
+#define PHY_ID_VSC8641 0x00070431
#define PHY_ID_VSC8662 0x00070660
#define PHY_ID_VSC8221 0x000fc550
#define PHY_ID_VSC8211 0x000fc4b0
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
.driver = { .owner = THIS_MODULE,},
+}, {
+ .phy_id = PHY_ID_VSC8641,
+ .name = "Vitesse VSC8641",
+ .phy_id_mask = 0x000ffff0,
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_init = &vsc824x_config_init,
+ .config_aneg = &vsc82x4_config_aneg,
+ .read_status = &genphy_read_status,
+ .ack_interrupt = &vsc824x_ack_interrupt,
+ .config_intr = &vsc82xx_config_intr,
+ .driver = { .owner = THIS_MODULE,},
}, {
.phy_id = PHY_ID_VSC8662,
.name = "Vitesse VSC8662",
{ PHY_ID_VSC8244, 0x000fffc0 },
{ PHY_ID_VSC8514, 0x000ffff0 },
{ PHY_ID_VSC8574, 0x000ffff0 },
+ { PHY_ID_VSC8641, 0x000ffff0 },
{ PHY_ID_VSC8662, 0x000ffff0 },
{ PHY_ID_VSC8221, 0x000ffff0 },
{ PHY_ID_VSC8211, 0x000ffff0 },
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.3.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.4.2.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01030500
+#define VMXNET3_DRIVER_VERSION_NUM 0x01040200
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
return ret;
}
-static struct kernel_param_ops mtu_max_ops = {
+static const struct kernel_param_ops mtu_max_ops = {
.set = mtu_max_set,
.get = param_get_uint,
};
return 0;
}
-static struct kernel_param_ops ring_order_ops = {
+static const struct kernel_param_ops ring_order_ops = {
.set = ring_order_set,
.get = param_get_uint,
};
lbtf_deb_enter(LBTF_DEB_USB);
- kparam_block_sysfs_write(fw_name);
+ kernel_param_lock(THIS_MODULE);
ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev);
if (ret < 0) {
pr_err("request_firmware() failed with %#x\n", ret);
pr_err("firmware %s not found\n", lbtf_fw_name);
- kparam_unblock_sysfs_write(fw_name);
+ kernel_param_unlock(THIS_MODULE);
goto done;
}
- kparam_unblock_sysfs_write(fw_name);
+ kernel_param_unlock(THIS_MODULE);
if (check_fwfile_format(cardp->fw->data, cardp->fw->size))
goto release_fw;
#include <xen/xen.h>
#include <xen/events.h>
#include <xen/interface/memory.h>
+#include <xen/page.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
/* Provide an option to disable split event channels at load time as
* event channels are limited resource. Split event channels are
#include <linux/slab.h>
#include <net/ip.h>
-#include <asm/xen/page.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/events.h>
np = netdev_priv(netdev);
np->xbdev = dev;
- /* No need to use rtnl_lock() before the call below as it
- * happens before register_netdev().
- */
- netif_set_real_num_tx_queues(netdev, 0);
np->queues = NULL;
err = -ENOMEM;
xennet_disconnect_backend(info);
kfree(info->queues);
info->queues = NULL;
- rtnl_lock();
- netif_set_real_num_tx_queues(info->netdev, 0);
- rtnl_unlock();
out:
return err;
}
/* MSI IRQ */
if (IS_ENABLED(CONFIG_PCI_MSI)) {
for (i = 0; i < ks_pcie->num_msi_host_irqs; i++) {
- irq_set_chained_handler(ks_pcie->msi_host_irqs[i],
- ks_pcie_msi_irq_handler);
- irq_set_handler_data(ks_pcie->msi_host_irqs[i],
- ks_pcie);
+ irq_set_chained_handler_and_data(ks_pcie->msi_host_irqs[i],
+ ks_pcie_msi_irq_handler,
+ ks_pcie);
}
}
}
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/time.h>
+#include <linux/ktime.h>
#include <xen/platform_pci.h>
#include <asm/xen/swiotlb-xen.h>
evtchn_port_t port = pdev->evtchn;
unsigned irq = pdev->irq;
s64 ns, ns_timeout;
- struct timeval tv;
spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
* (in the latter case we end up continually re-executing poll() with a
* timeout in the past). 1s difference gives plenty of slack for error.
*/
- do_gettimeofday(&tv);
- ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
+ ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC;
xen_clear_irq_pending(irq);
(unsigned long *)&pdev->sh_info->flags)) {
xen_poll_irq_timeout(irq, jiffies + 3*HZ);
xen_clear_irq_pending(irq);
- do_gettimeofday(&tv);
- ns = timeval_to_ns(&tv);
+ ns = ktime_get_ns();
if (ns > ns_timeout) {
dev_err(&pdev->xdev->dev,
"pciback not responding!!!\n");
set_irq_flags(virq, IRQF_VALID);
};
- irq_set_chained_handler(irq, mtk_eint_irq_handler);
- irq_set_handler_data(irq, pctl);
+ irq_set_chained_handler_and_data(irq, mtk_eint_irq_handler, pctl);
set_irq_flags(irq, IRQF_VALID);
return 0;
pint->pint_map_port = adi_pint_map_port;
platform_set_drvdata(pdev, pint);
- irq_set_chained_handler(pint->irq, adi_gpio_handle_pint_irq);
- irq_set_handler_data(pint->irq, pint);
+ irq_set_chained_handler_and_data(pint->irq, adi_gpio_handle_pint_irq,
+ pint);
list_add_tail(&pint->node, &adi_pint_list);
if (IS_ERR(info->irqmux_base))
return PTR_ERR(info->irqmux_base);
- irq_set_chained_handler(irq, st_gpio_irqmux_handler);
- irq_set_handler_data(irq, info);
+ irq_set_chained_handler_and_data(irq, st_gpio_irqmux_handler,
+ info);
}
return -ENOMEM;
}
- irq_set_chained_handler(irq, exynos_irq_demux_eint16_31);
- irq_set_handler_data(irq, muxed_data);
+ irq_set_chained_handler_and_data(irq, exynos_irq_demux_eint16_31,
+ muxed_data);
bank = d->pin_banks;
idx = 0;
}
eint_data->parents[i] = irq;
- irq_set_chained_handler(irq, handlers[i]);
- irq_set_handler_data(irq, eint_data);
+ irq_set_chained_handler_and_data(irq, handlers[i], eint_data);
}
bank = d->pin_banks;
data->domains[nr_domains++] = bank->irq_domain;
}
- irq_set_chained_handler(d->irq, s3c64xx_eint_gpio_irq);
- irq_set_handler_data(d->irq, data);
+ irq_set_chained_handler_and_data(d->irq, s3c64xx_eint_gpio_irq, data);
return 0;
}
return -ENXIO;
}
- irq_set_chained_handler(irq, s3c64xx_eint0_handlers[i]);
- irq_set_handler_data(irq, data);
+ irq_set_chained_handler_and_data(irq,
+ s3c64xx_eint0_handlers[i],
+ data);
}
bank = d->pin_banks;
writel(0xffffffff,
pctl->membase + sunxi_irq_status_reg_from_bank(i));
- irq_set_chained_handler(pctl->irq[i],
- sunxi_pinctrl_irq_handler);
- irq_set_handler_data(pctl->irq[i], pctl);
+ irq_set_chained_handler_and_data(pctl->irq[i],
+ sunxi_pinctrl_irq_handler,
+ pctl);
}
dev_info(&pdev->dev, "initialized sunXi PIO driver\n");
#define param_get_battery_voltage param_get_int
-static struct kernel_param_ops param_ops_ac_online = {
+static const struct kernel_param_ops param_ops_ac_online = {
.set = param_set_ac_online,
.get = param_get_ac_online,
};
-static struct kernel_param_ops param_ops_usb_online = {
+static const struct kernel_param_ops param_ops_usb_online = {
.set = param_set_usb_online,
.get = param_get_usb_online,
};
-static struct kernel_param_ops param_ops_battery_status = {
+static const struct kernel_param_ops param_ops_battery_status = {
.set = param_set_battery_status,
.get = param_get_battery_status,
};
-static struct kernel_param_ops param_ops_battery_present = {
+static const struct kernel_param_ops param_ops_battery_present = {
.set = param_set_battery_present,
.get = param_get_battery_present,
};
-static struct kernel_param_ops param_ops_battery_technology = {
+static const struct kernel_param_ops param_ops_battery_technology = {
.set = param_set_battery_technology,
.get = param_get_battery_technology,
};
-static struct kernel_param_ops param_ops_battery_health = {
+static const struct kernel_param_ops param_ops_battery_health = {
.set = param_set_battery_health,
.get = param_get_battery_health,
};
-static struct kernel_param_ops param_ops_battery_capacity = {
+static const struct kernel_param_ops param_ops_battery_capacity = {
.set = param_set_battery_capacity,
.get = param_get_battery_capacity,
};
-static struct kernel_param_ops param_ops_battery_voltage = {
+static const struct kernel_param_ops param_ops_battery_voltage = {
.set = param_set_battery_voltage,
.get = param_get_battery_voltage,
};
u8 reserved[4096 - 16];
} __attribute__((packed, aligned(PAGE_SIZE)));
-static void sclp_fill_cpu_info(struct sclp_cpu_info *info,
- struct read_cpu_info_sccb *sccb)
+static void sclp_fill_core_info(struct sclp_core_info *info,
+ struct read_cpu_info_sccb *sccb)
{
char *page = (char *) sccb;
info->configured = sccb->nr_configured;
info->standby = sccb->nr_standby;
info->combined = sccb->nr_configured + sccb->nr_standby;
- info->has_cpu_type = sclp.has_cpu_type;
- memcpy(&info->cpu, page + sccb->offset_configured,
- info->combined * sizeof(struct sclp_cpu_entry));
+ memcpy(&info->core, page + sccb->offset_configured,
+ info->combined * sizeof(struct sclp_core_entry));
}
-int sclp_get_cpu_info(struct sclp_cpu_info *info)
+int sclp_get_core_info(struct sclp_core_info *info)
{
int rc;
struct read_cpu_info_sccb *sccb;
rc = -EIO;
goto out;
}
- sclp_fill_cpu_info(info, sccb);
+ sclp_fill_core_info(info, sccb);
out:
free_page((unsigned long) sccb);
return rc;
struct sccb_header header;
} __attribute__((packed, aligned(8)));
-static int do_cpu_configure(sclp_cmdw_t cmd)
+static int do_core_configure(sclp_cmdw_t cmd)
{
struct cpu_configure_sccb *sccb;
int rc;
return rc;
}
-int sclp_cpu_configure(u8 cpu)
+int sclp_core_configure(u8 core)
{
- return do_cpu_configure(SCLP_CMDW_CONFIGURE_CPU | cpu << 8);
+ return do_core_configure(SCLP_CMDW_CONFIGURE_CPU | core << 8);
}
-int sclp_cpu_deconfigure(u8 cpu)
+int sclp_core_deconfigure(u8 core)
{
- return do_cpu_configure(SCLP_CMDW_DECONFIGURE_CPU | cpu << 8);
+ return do_core_configure(SCLP_CMDW_DECONFIGURE_CPU | core << 8);
}
#ifdef CONFIG_MEMORY_HOTPLUG
static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
{
- struct sclp_cpu_entry *cpue;
+ struct sclp_core_entry *cpue;
u16 boot_cpu_address, cpu;
if (sclp_read_info_early(sccb))
sclp.facilities = sccb->facilities;
sclp.has_sprp = !!(sccb->fac84 & 0x02);
- sclp.has_cpu_type = !!(sccb->fac84 & 0x01);
+ sclp.has_core_type = !!(sccb->fac84 & 0x01);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
if (!sccb->hcpua) {
if (MACHINE_IS_VM)
- sclp.max_cpu = 64;
+ sclp.max_cores = 64;
else
- sclp.max_cpu = sccb->ncpurl;
+ sclp.max_cores = sccb->ncpurl;
} else {
- sclp.max_cpu = sccb->hcpua + 1;
+ sclp.max_cores = sccb->hcpua + 1;
}
boot_cpu_address = stap();
/* get info for boot cpu from lowcore, stored in the HSA */
- sa_ext = dump_save_area_create(0);
+ sa_ext = dump_save_areas.areas[0];
if (!sa_ext)
return -ENOMEM;
if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base,
static int ap_device_remove(struct device *dev);
static int ap_device_probe(struct device *dev);
static void ap_interrupt_handler(struct airq_struct *airq);
-static void ap_reset(struct ap_device *ap_dev);
+static void ap_reset(struct ap_device *ap_dev, unsigned long *flags);
static void ap_config_timeout(unsigned long ptr);
static int ap_select_domain(void);
static void ap_query_configuration(void);
static int ap_query_functions(ap_qid_t qid, unsigned int *functions)
{
struct ap_queue_status status;
- int i;
+
status = __ap_query_functions(qid, functions);
- for (i = 0; i < AP_MAX_RESET; i++) {
- if (ap_queue_status_invalid_test(&status))
- return -ENODEV;
+ if (ap_queue_status_invalid_test(&status))
+ return -ENODEV;
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- return 0;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- case AP_RESPONSE_BUSY:
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
- case AP_RESPONSE_INVALID_ADDRESS:
- return -ENODEV;
- case AP_RESPONSE_OTHERWISE_CHANGED:
- break;
- default:
- break;
- }
- if (i < AP_MAX_RESET - 1) {
- udelay(5);
- status = __ap_query_functions(qid, functions);
- }
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ return 0;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return -ENODEV;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ default:
+ return -EBUSY;
}
- return -EBUSY;
}
/**
* on the return value it waits a while and tests the AP queue if interrupts
* have been switched on using ap_test_queue().
*/
-static int ap_queue_enable_interruption(ap_qid_t qid, void *ind)
+static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind)
{
struct ap_queue_status status;
- int t_depth, t_device_type, rc, i;
- rc = -EBUSY;
- status = ap_queue_interruption_control(qid, ind);
-
- for (i = 0; i < AP_MAX_RESET; i++) {
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- if (status.int_enabled)
- return 0;
- break;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- case AP_RESPONSE_BUSY:
- if (i < AP_MAX_RESET - 1) {
- udelay(5);
- status = ap_queue_interruption_control(qid,
- ind);
- continue;
- }
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
- case AP_RESPONSE_INVALID_ADDRESS:
- return -ENODEV;
- case AP_RESPONSE_OTHERWISE_CHANGED:
- if (status.int_enabled)
- return 0;
- break;
- default:
- break;
- }
- if (i < AP_MAX_RESET - 1) {
- udelay(5);
- status = ap_test_queue(qid, &t_depth, &t_device_type);
- }
+ status = ap_queue_interruption_control(ap_dev->qid, ind);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ return 0;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return -ENODEV;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ default:
+ return -EBUSY;
}
- return rc;
}
/**
}
EXPORT_SYMBOL(ap_recv);
+/**
+ * __ap_schedule_poll_timer(): Schedule poll timer.
+ *
+ * Set up the timer to run the poll tasklet
+ */
+static inline void __ap_schedule_poll_timer(void)
+{
+ ktime_t hr_time;
+
+ spin_lock_bh(&ap_poll_timer_lock);
+ if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
+ hr_time = ktime_set(0, poll_timeout);
+ hrtimer_forward_now(&ap_poll_timer, hr_time);
+ hrtimer_restart(&ap_poll_timer);
+ }
+ spin_unlock_bh(&ap_poll_timer_lock);
+}
+
+/**
+ * ap_schedule_poll_timer(): Schedule poll timer.
+ *
+ * Set up the timer to run the poll tasklet
+ */
+static inline void ap_schedule_poll_timer(void)
+{
+ if (ap_using_interrupts())
+ return;
+ __ap_schedule_poll_timer();
+}
+
+
/**
* ap_query_queue(): Check if an AP queue is available.
* @qid: The AP queue number
* @queue_depth: Pointer to queue depth value
* @device_type: Pointer to device type value
- *
- * The test is repeated for AP_MAX_RESET times.
*/
static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
{
struct ap_queue_status status;
- int t_depth, t_device_type, rc, i;
+ int t_depth, t_device_type;
- rc = -EBUSY;
- for (i = 0; i < AP_MAX_RESET; i++) {
- status = ap_test_queue(qid, &t_depth, &t_device_type);
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- *queue_depth = t_depth + 1;
- *device_type = t_device_type;
- rc = 0;
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- break;
- case AP_RESPONSE_DECONFIGURED:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_CHECKSTOPPED:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_INVALID_ADDRESS:
- rc = -ENODEV;
- break;
- case AP_RESPONSE_OTHERWISE_CHANGED:
- break;
- case AP_RESPONSE_BUSY:
- break;
- default:
- BUG();
- }
- if (rc != -EBUSY)
- break;
- if (i < AP_MAX_RESET - 1)
- udelay(5);
+ status = ap_test_queue(qid, &t_depth, &t_device_type);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ *queue_depth = t_depth + 1;
+ *device_type = t_device_type;
+ return 0;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ return -ENODEV;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ case AP_RESPONSE_BUSY:
+ return -EBUSY;
+ default:
+ BUG();
}
- return rc;
}
/**
* ap_init_queue(): Reset an AP queue.
* @qid: The AP queue number
*
- * Reset an AP queue and wait for it to become available again.
+ * Submit the Reset command to an AP queue.
+ * Since the reset is asynchron set the state to 'RESET_IN_PROGRESS'
+ * and check later via ap_poll_queue() if the reset is done.
*/
-static int ap_init_queue(ap_qid_t qid)
+static int ap_init_queue(struct ap_device *ap_dev)
{
struct ap_queue_status status;
- int rc, dummy, i;
- rc = -ENODEV;
- status = ap_reset_queue(qid);
- for (i = 0; i < AP_MAX_RESET; i++) {
- switch (status.response_code) {
- case AP_RESPONSE_NORMAL:
- if (status.queue_empty)
- rc = 0;
- break;
- case AP_RESPONSE_Q_NOT_AVAIL:
- case AP_RESPONSE_DECONFIGURED:
- case AP_RESPONSE_CHECKSTOPPED:
- i = AP_MAX_RESET; /* return with -ENODEV */
- break;
- case AP_RESPONSE_RESET_IN_PROGRESS:
- rc = -EBUSY;
- case AP_RESPONSE_BUSY:
- default:
- break;
- }
- if (rc != -ENODEV && rc != -EBUSY)
- break;
- if (i < AP_MAX_RESET - 1) {
- /* Time we are waiting until we give up (0.7sec * 90).
- * Since the actual request (in progress) will not
- * interrupted immediately for the reset command,
- * we have to be patient. In worst case we have to
- * wait 60sec + reset time (some msec).
- */
- schedule_timeout(AP_RESET_TIMEOUT);
- status = ap_test_queue(qid, &dummy, &dummy);
- }
- }
- if (rc == 0 && ap_using_interrupts()) {
- rc = ap_queue_enable_interruption(qid, ap_airq.lsi_ptr);
- /* If interruption mode is supported by the machine,
- * but an AP can not be enabled for interruption then
- * the AP will be discarded. */
- if (rc)
- pr_err("Registering adapter interrupts for "
- "AP %d failed\n", AP_QID_DEVICE(qid));
+ status = ap_reset_queue(ap_dev->qid);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ ap_dev->interrupt = AP_INTR_DISABLED;
+ ap_dev->reset = AP_RESET_IN_PROGRESS;
+ return 0;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ return -EBUSY;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ default:
+ return -ENODEV;
}
- return rc;
}
/**
static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL);
+static ssize_t ap_reset_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_device *ap_dev = to_ap_dev(dev);
+ int rc = 0;
+
+ spin_lock_bh(&ap_dev->lock);
+ switch (ap_dev->reset) {
+ case AP_RESET_IGNORE:
+ rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
+ break;
+ case AP_RESET_ARMED:
+ rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
+ break;
+ case AP_RESET_DO:
+ rc = snprintf(buf, PAGE_SIZE, "Reset Timer expired.\n");
+ break;
+ case AP_RESET_IN_PROGRESS:
+ rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n");
+ break;
+ default:
+ break;
+ }
+ spin_unlock_bh(&ap_dev->lock);
+ return rc;
+}
+
+static DEVICE_ATTR(reset, 0444, ap_reset_show, NULL);
+
+static ssize_t ap_interrupt_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ap_device *ap_dev = to_ap_dev(dev);
+ int rc = 0;
+
+ spin_lock_bh(&ap_dev->lock);
+ switch (ap_dev->interrupt) {
+ case AP_INTR_DISABLED:
+ rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
+ break;
+ case AP_INTR_ENABLED:
+ rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
+ break;
+ case AP_INTR_IN_PROGRESS:
+ rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
+ break;
+ }
+ spin_unlock_bh(&ap_dev->lock);
+ return rc;
+}
+
+static DEVICE_ATTR(interrupt, 0444, ap_interrupt_show, NULL);
+
static ssize_t ap_modalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type);
+ return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type);
}
static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
&dev_attr_request_count.attr,
&dev_attr_requestq_count.attr,
&dev_attr_pendingq_count.attr,
+ &dev_attr_reset.attr,
+ &dev_attr_interrupt.attr,
&dev_attr_modalias.attr,
&dev_attr_ap_functions.attr,
NULL
spin_lock_bh(&ap_device_list_lock);
list_del_init(&ap_dev->list);
spin_unlock_bh(&ap_device_list_lock);
+ } else {
+ if (ap_dev->reset == AP_RESET_IN_PROGRESS ||
+ ap_dev->interrupt == AP_INTR_IN_PROGRESS)
+ __ap_schedule_poll_timer();
}
return rc;
}
struct ap_device *ap_dev;
struct device *dev;
ap_qid_t qid;
- int queue_depth, device_type;
+ int queue_depth = 0, device_type = 0;
unsigned int device_functions;
int rc, i;
else
rc = -ENODEV;
if (dev) {
- if (rc == -EBUSY) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(AP_RESET_TIMEOUT);
- rc = ap_query_queue(qid, &queue_depth,
- &device_type);
- }
ap_dev = to_ap_dev(dev);
spin_lock_bh(&ap_dev->lock);
- if (rc || ap_dev->unregistered) {
+ if (rc == -ENODEV || ap_dev->unregistered) {
spin_unlock_bh(&ap_dev->lock);
if (ap_dev->unregistered)
i--;
put_device(dev);
continue;
}
- if (rc)
- continue;
- rc = ap_init_queue(qid);
if (rc)
continue;
ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL);
if (!ap_dev)
break;
ap_dev->qid = qid;
+ rc = ap_init_queue(ap_dev);
+ if ((rc != 0) && (rc != -EBUSY)) {
+ kfree(ap_dev);
+ continue;
+ }
ap_dev->queue_depth = queue_depth;
ap_dev->unregistered = 1;
spin_lock_init(&ap_dev->lock);
add_timer(&ap_config_timer);
}
-/**
- * __ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void __ap_schedule_poll_timer(void)
-{
- ktime_t hr_time;
-
- spin_lock_bh(&ap_poll_timer_lock);
- if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
- hr_time = ktime_set(0, poll_timeout);
- hrtimer_forward_now(&ap_poll_timer, hr_time);
- hrtimer_restart(&ap_poll_timer);
- }
- spin_unlock_bh(&ap_poll_timer_lock);
-}
-
-/**
- * ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void ap_schedule_poll_timer(void)
-{
- if (ap_using_interrupts())
- return;
- __ap_schedule_poll_timer();
-}
-
/**
* ap_poll_read(): Receive pending reply messages from an AP device.
* @ap_dev: pointer to the AP device
ap_dev->reply->message, ap_dev->reply->length);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
+ ap_dev->interrupt = status.int_enabled;
atomic_dec(&ap_poll_requests);
ap_decrease_queue_count(ap_dev);
list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
*flags |= 1;
break;
case AP_RESPONSE_NO_PENDING_REPLY:
+ ap_dev->interrupt = status.int_enabled;
if (status.queue_empty) {
/* The card shouldn't forget requests but who knows. */
atomic_sub(ap_dev->queue_count, &ap_poll_requests);
struct ap_message *ap_msg;
if (ap_dev->requestq_count <= 0 ||
- ap_dev->queue_count >= ap_dev->queue_depth)
+ (ap_dev->queue_count >= ap_dev->queue_depth) ||
+ (ap_dev->reset == AP_RESET_IN_PROGRESS))
return 0;
/* Start the next request on the queue. */
ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
/**
* ap_poll_queue(): Poll AP device for pending replies and send new messages.
+ * Check if the queue has a pending reset. In case it's done re-enable
+ * interrupts, otherwise reschedule the poll_timer for another attempt.
* @ap_dev: pointer to the bus device
* @flags: pointer to control flags, bit 2^0 is set if another poll is
* required, bit 2^1 is set if the poll timer needs to get armed
*/
static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
{
- int rc;
+ int rc, depth, type;
+ struct ap_queue_status status;
+
+
+ if (ap_dev->reset == AP_RESET_IN_PROGRESS) {
+ status = ap_test_queue(ap_dev->qid, &depth, &type);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ ap_dev->reset = AP_RESET_IGNORE;
+ if (ap_using_interrupts()) {
+ rc = ap_queue_enable_interruption(
+ ap_dev, ap_airq.lsi_ptr);
+ if (!rc)
+ ap_dev->interrupt = AP_INTR_IN_PROGRESS;
+ else if (rc == -ENODEV) {
+ pr_err("Registering adapter interrupts for "
+ "AP %d failed\n", AP_QID_DEVICE(ap_dev->qid));
+ return rc;
+ }
+ }
+ /* fall through */
+ case AP_RESPONSE_BUSY:
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ *flags |= AP_POLL_AFTER_TIMEOUT;
+ break;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ return -ENODEV;
+ default:
+ break;
+ }
+ }
+
+ if ((ap_dev->reset != AP_RESET_IN_PROGRESS) &&
+ (ap_dev->interrupt == AP_INTR_IN_PROGRESS)) {
+ status = ap_test_queue(ap_dev->qid, &depth, &type);
+ if (ap_using_interrupts()) {
+ if (status.int_enabled == 1)
+ ap_dev->interrupt = AP_INTR_ENABLED;
+ else
+ *flags |= AP_POLL_AFTER_TIMEOUT;
+ } else
+ ap_dev->interrupt = AP_INTR_DISABLED;
+ }
rc = ap_poll_read(ap_dev, flags);
if (rc)
struct ap_queue_status status;
if (list_empty(&ap_dev->requestq) &&
- ap_dev->queue_count < ap_dev->queue_depth) {
+ (ap_dev->queue_count < ap_dev->queue_depth) &&
+ (ap_dev->reset != AP_RESET_IN_PROGRESS)) {
status = __ap_send(ap_dev->qid, ap_msg->psmid,
ap_msg->message, ap_msg->length,
ap_msg->special);
* Reset a not responding AP device and move all requests from the
* pending queue to the request queue.
*/
-static void ap_reset(struct ap_device *ap_dev)
+static void ap_reset(struct ap_device *ap_dev, unsigned long *flags)
{
int rc;
- ap_dev->reset = AP_RESET_IGNORE;
atomic_sub(ap_dev->queue_count, &ap_poll_requests);
ap_dev->queue_count = 0;
list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
ap_dev->requestq_count += ap_dev->pendingq_count;
ap_dev->pendingq_count = 0;
- rc = ap_init_queue(ap_dev->qid);
+ rc = ap_init_queue(ap_dev);
if (rc == -ENODEV)
ap_dev->unregistered = 1;
else
- __ap_schedule_poll_timer();
+ *flags |= AP_POLL_AFTER_TIMEOUT;
}
static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
if (ap_poll_queue(ap_dev, flags))
ap_dev->unregistered = 1;
if (ap_dev->reset == AP_RESET_DO)
- ap_reset(ap_dev);
+ ap_reset(ap_dev, flags);
}
return 0;
}
spin_unlock(&ap_dev->lock);
}
spin_unlock(&ap_device_list_lock);
- } while (flags & 1);
- if (flags & 2)
- ap_schedule_poll_timer();
+ } while (flags & AP_POLL_IMMEDIATELY);
+ if (flags & AP_POLL_AFTER_TIMEOUT)
+ __ap_schedule_poll_timer();
}
/**
#define AP_DEVICES 64 /* Number of AP devices. */
#define AP_DOMAINS 256 /* Number of AP domains. */
-#define AP_MAX_RESET 90 /* Maximum number of resets. */
#define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */
#define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */
#define AP_POLL_TIME 1 /* Time in ticks between receive polls. */
+#define AP_POLL_IMMEDIATELY 1 /* continue running poll tasklet */
+#define AP_POLL_AFTER_TIMEOUT 2 /* run poll tasklet again after timout */
+
extern int ap_domain_index;
/**
#define AP_RESET_IGNORE 0 /* request timeout will be ignored */
#define AP_RESET_ARMED 1 /* request timeout timer is active */
#define AP_RESET_DO 2 /* AP reset required */
+#define AP_RESET_IN_PROGRESS 3 /* AP reset in progress */
+
+/*
+ * AP interrupt states
+ */
+#define AP_INTR_DISABLED 0 /* AP interrupt disabled */
+#define AP_INTR_ENABLED 1 /* AP interrupt enabled */
+#define AP_INTR_IN_PROGRESS 3 /* AP interrupt in progress */
struct ap_device;
struct ap_message;
struct timer_list timeout; /* Timer for request timeouts. */
int reset; /* Reset required after req. timeout. */
+ int interrupt; /* indicate if interrupts are enabled */
int queue_count; /* # messages currently on AP queue. */
struct list_head pendingq; /* List of message sent to AP queue. */
* But the maximum time limit managed by the stomper code is set to 60sec.
* Hence we have to wait at least that time period.
*/
-#define CEX4_CLEANUP_TIME (61*HZ)
+#define CEX4_CLEANUP_TIME (900*HZ)
static struct ap_device_id zcrypt_cex4_ids[] = {
{ AP_DEVICE(AP_DEVICE_TYPE_CEX4) },
/* redirect this interrupts to the first one */
irq_set_chip(irq2, &dummy_irq_chip);
- irq_set_chained_handler(irq2, intc_redirect_irq);
- irq_set_handler_data(irq2, (void *)irq);
+ irq_set_chained_handler_and_data(irq2,
+ intc_redirect_irq,
+ (void *)irq);
}
}
*/
irq_set_nothread(irq);
- irq_set_chained_handler(entry->pirq, intc_virq_handler);
+ /* Set handler data before installing the handler */
add_virq_to_pirq(entry->pirq, irq);
+ irq_set_chained_handler(entry->pirq, intc_virq_handler);
radix_tree_tag_clear(&d->tree, entry->enum_id,
INTC_TAG_VIRQ_NEEDS_ALLOC);
return ret;
}
-static struct kernel_param_ops duration_ops = {
+static const struct kernel_param_ops duration_ops = {
.set = duration_set,
.get = param_get_int,
};
return ret;
}
-static struct kernel_param_ops window_size_ops = {
+static const struct kernel_param_ops window_size_ops = {
.set = window_size_set,
.get = param_get_int,
};
#define param_check_vmidfilter(name, p) __param_check(name, p, void)
-static struct kernel_param_ops param_ops_vmidfilter = {
+static const struct kernel_param_ops param_ops_vmidfilter = {
.set = param_set_vmidfilter,
.get = param_get_vmidfilter,
};
_SIM_CONTROL_OPERATOR_BITS));
return 0;
} else {
- return hv_console_write((HV_VirtAddr)buf, count);
+ /* Translate 0 bytes written to EAGAIN for hvc_console_print. */
+ return hv_console_write((HV_VirtAddr)buf, count) ?: -EAGAIN;
}
}
static void xen_console_update_evtchn(struct xencons_info *info)
{
if (xen_hvm_domain()) {
- uint64_t v;
+ uint64_t v = 0;
int err;
err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
return 0;
}
-static struct kernel_param_ops param_ops_sysrq_reset_seq = {
+static const struct kernel_param_ops param_ops_sysrq_reset_seq = {
.get = param_get_ushort,
.set = sysrq_reset_seq_param_set,
};
char file_arr[] = "CMVxy.bin";
char *file;
- kparam_block_sysfs_write(cmv_file);
+ kernel_param_lock(THIS_MODULE);
/* set proper name corresponding modem version and line type */
if (cmv_file[sc->modem_index] == NULL) {
if (UEA_CHIP_VERSION(sc) == ADI930)
strlcat(cmv_name, file, UEA_FW_NAME_MAX);
if (ver == 2)
strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX);
- kparam_unblock_sysfs_write(cmv_file);
+ kernel_param_unlock(THIS_MODULE);
}
static int request_cmvs_old(struct uea_softc *sc,
return 0;
}
-static struct kernel_param_ops param_ops_scroll = {
+static const struct kernel_param_ops param_ops_scroll = {
.set = param_set_scroll,
};
#define param_check_scroll(name, p) __param_check(name, p, void)
/* Prepare startup mode */
- kparam_block_sysfs_write(mode_option);
+ kernel_param_lock(THIS_MODULE);
rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8);
- kparam_unblock_sysfs_write(mode_option);
+ kernel_param_unlock(THIS_MODULE);
if (! ((rc == 1) || (rc == 2))) {
rc = -EINVAL;
dev_err(info->device, "mode %s not found\n", mode_option);
return strlen(buffer) + 1;
}
-static struct kernel_param_ops vm_cmdline_param_ops = {
+static const struct kernel_param_ops vm_cmdline_param_ops = {
.set = vm_cmdline_set,
.get = vm_cmdline_get,
};
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/io_apic.h>
-#include <asm/xen/page.h>
#include <asm/xen/pci.h>
+#include <xen/page.h>
#endif
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
-#include <asm/xen/page.h>
#include <xen/xen.h>
#include <xen/xen-ops.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
+#include <xen/page.h>
#include "events_internal.h"
#include <xen/balloon.h>
#include <xen/gntdev.h>
#include <xen/events.h>
+#include <xen/page.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
-#define SPP (PAGE_SIZE / sizeof(grant_status_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
#include <xen/grant_table.h>
#include <xen/events.h>
#include <xen/hvc-console.h>
+#include <xen/page.h>
#include <xen/xen-ops.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypervisor.h>
enum shutdown_state {
#include <xen/xen.h>
#include <xen/interface/xen.h>
+#include <xen/page.h>
#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
#include <asm/xen/hypervisor.h>
#include <xen/tmem.h>
}
#endif
#ifdef CONFIG_CLEANCACHE
- BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
+ BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
if (tmem_enabled && cleancache) {
int err;
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <asm/xen/hypervisor.h>
-#include <asm/xen/page.h>
+#include <xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
#include <xen/balloon.h>
int i, j;
for (i = 0; i < nr_pages; i++) {
- unsigned long addr = (unsigned long)vaddr +
- (PAGE_SIZE * i);
err = gnttab_grant_foreign_access(dev->otherend_id,
- virt_to_mfn(addr), 0);
+ virt_to_mfn(vaddr), 0);
if (err < 0) {
xenbus_dev_fatal(dev, err,
"granting access to ring page");
goto fail;
}
grefs[i] = err;
+
+ vaddr = vaddr + PAGE_SIZE;
}
return 0;
int err = 0;
if (xen_hvm_domain()) {
- uint64_t v;
+ uint64_t v = 0;
err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
if (!err && v)
return container_of(inode, struct bdev_inode, vfs_inode);
}
-inline struct block_device *I_BDEV(struct inode *inode)
+struct block_device *I_BDEV(struct inode *inode)
{
return &BDEV_I(inode)->bdev;
}
BTRFS_WORK_HELPER(scrub_helper);
BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);
+BTRFS_WORK_HELPER(scrubparity_helper);
static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
BTRFS_WORK_HELPER_PROTO(scrub_helper);
BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
+
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
unsigned int flags,
* the first item to check. But sometimes, we may enter it with
* slot==nritems. In that case, go to the next leaf before we continue.
*/
- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
- ret = btrfs_next_old_leaf(root, path, time_seq);
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ if (time_seq == (u64)-1)
+ ret = btrfs_next_leaf(root, path);
+ else
+ ret = btrfs_next_old_leaf(root, path, time_seq);
+ }
while (!ret && count < total_refs) {
eb = path->nodes[0];
eie = NULL;
}
next:
- ret = btrfs_next_old_item(root, path, time_seq);
+ if (time_seq == (u64)-1)
+ ret = btrfs_next_item(root, path);
+ else
+ ret = btrfs_next_old_item(root, path, time_seq);
}
if (ret > 0)
if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root);
+ else if (time_seq == (u64)-1)
+ root_level = btrfs_header_level(root->node);
else
root_level = btrfs_old_root_level(root, time_seq);
}
path->lowest_level = level;
- ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+ if (time_seq == (u64)-1)
+ ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
+ 0, 0);
+ else
+ ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
+ time_seq);
/* root node has been locked, we can release @subvol_srcu safely here */
srcu_read_unlock(&fs_info->subvol_srcu, index);
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
0);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ return PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
}
/*
- * merge two lists of backrefs and adjust counts accordingly
+ * merge backrefs and adjust counts accordingly
*
* mode = 1: merge identical keys, if key is set
* FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
ref2 = list_entry(pos2, struct __prelim_ref, list);
+ if (!ref_for_same_block(ref1, ref2))
+ continue;
if (mode == 1) {
- if (!ref_for_same_block(ref1, ref2))
- continue;
if (!ref1->parent && ref2->parent) {
xchg = ref1;
ref1 = ref2;
struct list_head *prefs, u64 *total_refs,
u64 inum)
{
+ struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
- struct rb_node *n = &head->node.rb_node;
struct btrfs_key key;
struct btrfs_key op_key = {0};
int sgn;
btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
spin_lock(&head->lock);
- n = rb_first(&head->ref_root);
- while (n) {
- struct btrfs_delayed_ref_node *node;
- node = rb_entry(n, struct btrfs_delayed_ref_node,
- rb_node);
- n = rb_next(n);
+ list_for_each_entry(node, &head->ref_list, list) {
if (node->seq > seq)
continue;
*
* NOTE: This can return values > 0
*
+ * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * much like trans == NULL case, the difference only lies in it will not
+ * commit root.
+ * The special case is for qgroup to search roots in commit_transaction().
+ *
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
path->skip_locking = 1;
}
+ if (time_seq == (u64)-1)
+ path->skip_locking = 1;
+
/*
* grab both a lock on the path and a lock on the delayed ref head.
* We need both to get a consistent picture of how the refs look
BUG_ON(ret == 0);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (trans && likely(trans->type != __TRANS_DUMMY)) {
+ if (trans && likely(trans->type != __TRANS_DUMMY) &&
+ time_seq != (u64)-1) {
#else
- if (trans) {
+ if (trans && time_seq != (u64)-1) {
#endif
/*
* look if there are updates for this ref queued and lock the
eb = read_tree_block(fs_info->extent_root,
ref->parent, 0);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
+ goto out;
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
old = read_tree_block(root, logical, 0);
- if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
- free_extent_buffer(old);
+ if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
+ if (!IS_ERR(old))
+ free_extent_buffer(old);
btrfs_warn(root->fs_info,
"failed to read tree block %llu from get_old_root", logical);
} else {
if (!cur || !uptodate) {
if (!cur) {
cur = read_tree_block(root, blocknr, gen);
- if (!cur || !extent_buffer_uptodate(cur)) {
+ if (IS_ERR(cur)) {
+ return PTR_ERR(cur);
+ } else if (!extent_buffer_uptodate(cur)) {
free_extent_buffer(cur);
return -EIO;
}
eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot));
- if (eb && !extent_buffer_uptodate(eb)) {
- free_extent_buffer(eb);
+ if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) {
+ if (!IS_ERR(eb))
+ free_extent_buffer(eb);
eb = NULL;
}
ret = -EAGAIN;
tmp = read_tree_block(root, blocknr, 0);
- if (tmp) {
+ if (!IS_ERR(tmp)) {
/*
* If the read above didn't mark this buffer up to date,
* it will never end up being up to date. Set ret to EIO now
/* csum types */
#define BTRFS_CSUM_TYPE_CRC32 0
-static int btrfs_csum_sizes[] = { 4, 0 };
+static int btrfs_csum_sizes[] = { 4 };
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
struct task_struct *cleaner_kthread;
int thread_pool_size;
- struct kobject super_kobj;
struct kobject *space_info_kobj;
- struct kobject *device_dir_kobj;
- struct completion kobj_unregister;
int do_barriers;
int closing;
int log_root_recovering;
struct btrfs_workqueue *scrub_workers;
struct btrfs_workqueue *scrub_wr_completion_workers;
struct btrfs_workqueue *scrub_nocow_workers;
+ struct btrfs_workqueue *scrub_parity_workers;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
u32 check_integrity_print_mask;
/* list of dirty qgroups to be written at next commit */
struct list_head dirty_qgroups;
- /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
+ /* used by qgroup for an efficient tree traversal */
u64 qgroup_seq;
/* qgroup rescan items */
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode);
void btrfs_orphan_release_metadata(struct inode *inode);
int __get_raid_index(u64 flags);
int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void check_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
#ifdef CONFIG_BTRFS_ASSERT
+__cold
static inline void assfail(char *expr, char *file, int line)
{
pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
#define btrfs_assert()
__printf(5, 6)
+__cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
+__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno);
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
*/
-
#define btrfs_abort_transaction(trans, root, errno) \
do { \
- __btrfs_abort_transaction(trans, root, __func__, \
- __LINE__, errno); \
+ /* Report first abort since mount */ \
+ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
+ &((root)->fs_info->fs_state))) { \
+ WARN(1, KERN_DEBUG \
+ "BTRFS: Transaction aborted (error %d)\n", \
+ (errno)); \
+ } \
+ __btrfs_abort_transaction((trans), (root), __func__, \
+ __LINE__, (errno)); \
} while (0)
#define btrfs_std_error(fs_info, errno) \
} while (0)
__printf(5, 6)
+__cold
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
#include "ctree.h"
#include "delayed-ref.h"
#include "transaction.h"
+#include "qgroup.h"
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
return 0;
}
-/*
- * entries in the rb tree are ordered by the byte number of the extent,
- * type of the delayed backrefs and content of delayed backrefs.
- */
-static int comp_entry(struct btrfs_delayed_ref_node *ref2,
- struct btrfs_delayed_ref_node *ref1,
- bool compare_seq)
-{
- if (ref1->bytenr < ref2->bytenr)
- return -1;
- if (ref1->bytenr > ref2->bytenr)
- return 1;
- if (ref1->is_head && ref2->is_head)
- return 0;
- if (ref2->is_head)
- return -1;
- if (ref1->is_head)
- return 1;
- if (ref1->type < ref2->type)
- return -1;
- if (ref1->type > ref2->type)
- return 1;
- if (ref1->no_quota > ref2->no_quota)
- return 1;
- if (ref1->no_quota < ref2->no_quota)
- return -1;
- /* merging of sequenced refs is not allowed */
- if (compare_seq) {
- if (ref1->seq < ref2->seq)
- return -1;
- if (ref1->seq > ref2->seq)
- return 1;
- }
- if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
- ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
- return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
- btrfs_delayed_node_to_tree_ref(ref1),
- ref1->type);
- } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
- ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
- return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
- btrfs_delayed_node_to_data_ref(ref1));
- }
- BUG();
- return 0;
-}
-
-/*
- * insert a new ref into the rbtree. This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
-static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct btrfs_delayed_ref_node *entry;
- struct btrfs_delayed_ref_node *ins;
- int cmp;
-
- ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
- rb_node);
-
- cmp = comp_entry(entry, ins, 1);
- if (cmp < 0)
- p = &(*p)->rb_left;
- else if (cmp > 0)
- p = &(*p)->rb_right;
- else
- return entry;
- }
-
- rb_link_node(node, parent_node, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
/* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
struct rb_node *node)
rb_erase(&head->href_node, &delayed_refs->href_root);
} else {
assert_spin_locked(&head->lock);
- rb_erase(&ref->rb_node, &head->ref_root);
+ list_del(&ref->list);
}
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
trans->delayed_ref_updates--;
}
-static int merge_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head,
- struct btrfs_delayed_ref_node *ref, u64 seq)
-{
- struct rb_node *node;
- int mod = 0;
- int done = 0;
-
- node = rb_next(&ref->rb_node);
- while (!done && node) {
- struct btrfs_delayed_ref_node *next;
-
- next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- node = rb_next(node);
- if (seq && next->seq >= seq)
- break;
- if (comp_entry(ref, next, 0))
- continue;
-
- if (ref->action == next->action) {
- mod = next->ref_mod;
- } else {
- if (ref->ref_mod < next->ref_mod) {
- struct btrfs_delayed_ref_node *tmp;
-
- tmp = ref;
- ref = next;
- next = tmp;
- done = 1;
- }
- mod = -next->ref_mod;
- }
-
- drop_delayed_ref(trans, delayed_refs, head, next);
- ref->ref_mod += mod;
- if (ref->ref_mod == 0) {
- drop_delayed_ref(trans, delayed_refs, head, ref);
- done = 1;
- } else {
- /*
- * You can't have multiples of the same ref on a tree
- * block.
- */
- WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
- ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
- }
- }
- return done;
-}
-
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head)
-{
- struct rb_node *node;
- u64 seq = 0;
-
- assert_spin_locked(&head->lock);
- /*
- * We don't have too much refs to merge in the case of delayed data
- * refs.
- */
- if (head->is_data)
- return;
-
- spin_lock(&fs_info->tree_mod_seq_lock);
- if (!list_empty(&fs_info->tree_mod_seq_list)) {
- struct seq_list *elem;
-
- elem = list_first_entry(&fs_info->tree_mod_seq_list,
- struct seq_list, list);
- seq = elem->seq;
- }
- spin_unlock(&fs_info->tree_mod_seq_lock);
-
- node = rb_first(&head->ref_root);
- while (node) {
- struct btrfs_delayed_ref_node *ref;
-
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- /* We can't merge refs that are outside of our seq count */
- if (seq && ref->seq >= seq)
- break;
- if (merge_ref(trans, delayed_refs, head, ref, seq))
- node = rb_first(&head->ref_root);
- else
- node = rb_next(&ref->rb_node);
- }
-}
-
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
}
/*
- * helper function to update an extent delayed ref in the
- * rbtree. existing and update must both have the same
- * bytenr and parent
+ * Helper to insert the ref_node to the tail or merge with tail.
*
- * This may free existing if the update cancels out whatever
- * operation it was doing.
+ * Return 0 for insert.
+ * Return >0 for merge.
*/
-static noinline void
-update_existing_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head,
- struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
+static int
+add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *root,
+ struct btrfs_delayed_ref_head *href,
+ struct btrfs_delayed_ref_node *ref)
{
- if (update->action != existing->action) {
- /*
- * this is effectively undoing either an add or a
- * drop. We decrement the ref_mod, and if it goes
- * down to zero we just delete the entry without
- * every changing the extent allocation tree.
- */
- existing->ref_mod--;
- if (existing->ref_mod == 0)
- drop_delayed_ref(trans, delayed_refs, head, existing);
- else
- WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
- existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
+ struct btrfs_delayed_ref_node *exist;
+ int mod;
+ int ret = 0;
+
+ spin_lock(&href->lock);
+ /* Check whether we can merge the tail node with ref */
+ if (list_empty(&href->ref_list))
+ goto add_tail;
+ exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
+ list);
+ /* No need to compare bytenr nor is_head */
+ if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
+ exist->seq != ref->seq)
+ goto add_tail;
+
+ if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+ comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
+ btrfs_delayed_node_to_tree_ref(ref),
+ ref->type))
+ goto add_tail;
+ if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
+ comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
+ btrfs_delayed_node_to_data_ref(ref)))
+ goto add_tail;
+
+ /* Now we are sure we can merge */
+ ret = 1;
+ if (exist->action == ref->action) {
+ mod = ref->ref_mod;
} else {
- WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
- existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
- /*
- * the action on the existing ref matches
- * the action on the ref we're trying to add.
- * Bump the ref_mod by one so the backref that
- * is eventually added/removed has the correct
- * reference count
- */
- existing->ref_mod += update->ref_mod;
+ /* Need to change action */
+ if (exist->ref_mod < ref->ref_mod) {
+ exist->action = ref->action;
+ mod = -exist->ref_mod;
+ exist->ref_mod = ref->ref_mod;
+ } else
+ mod = -ref->ref_mod;
}
+ exist->ref_mod += mod;
+
+ /* remove existing tail if its ref_mod is zero */
+ if (exist->ref_mod == 0)
+ drop_delayed_ref(trans, root, href, exist);
+ spin_unlock(&href->lock);
+ return ret;
+
+add_tail:
+ list_add_tail(&ref->list, &href->ref_list);
+ atomic_inc(&root->num_entries);
+ trans->delayed_ref_updates++;
+ spin_unlock(&href->lock);
+ return ret;
}
/*
static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref, u64 bytenr,
- u64 num_bytes, int action, int is_data)
+ struct btrfs_delayed_ref_node *ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ u64 bytenr, u64 num_bytes, int action, int is_data)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_qgroup_extent_record *qexisting;
int count_mod = 1;
int must_insert_reserved = 0;
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
- head_ref->ref_root = RB_ROOT;
+ INIT_LIST_HEAD(&head_ref->ref_list);
head_ref->processing = 0;
head_ref->total_ref_mod = count_mod;
+ /* Record qgroup extent info if provided */
+ if (qrecord) {
+ qrecord->bytenr = bytenr;
+ qrecord->num_bytes = num_bytes;
+ qrecord->old_roots = NULL;
+
+ qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
+ qrecord);
+ if (qexisting)
+ kfree(qrecord);
+ }
+
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
u64 num_bytes, u64 parent, u64 ref_root, int level,
int action, int no_quota)
{
- struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
+ int ret;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
trace_add_delayed_tree_ref(ref, full_ref, action);
- spin_lock(&head_ref->lock);
- existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
- if (existing) {
- update_existing_ref(trans, delayed_refs, head_ref, existing,
- ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
+ ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+ /*
+ * XXX: memory should be freed at the same level allocated.
+ * But bad practice is anywhere... Follow it now. Need cleanup.
+ */
+ if (ret > 0)
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
- } else {
- atomic_inc(&delayed_refs->num_entries);
- trans->delayed_ref_updates++;
- }
- spin_unlock(&head_ref->lock);
}
/*
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
u64 offset, int action, int no_quota)
{
- struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
+ int ret;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
trace_add_delayed_data_ref(ref, full_ref, action);
- spin_lock(&head_ref->lock);
- existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
- if (existing) {
- update_existing_ref(trans, delayed_refs, head_ref, existing,
- ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
+ ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+ if (ret > 0)
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
- } else {
- atomic_inc(&delayed_refs->num_entries);
- trans->delayed_ref_updates++;
- }
- spin_unlock(&head_ref->lock);
}
/*
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_qgroup_extent_record *record = NULL;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
return -ENOMEM;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
- if (!head_ref) {
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
- return -ENOMEM;
+ if (!head_ref)
+ goto free_ref;
+
+ if (fs_info->quota_enabled && is_fstree(ref_root)) {
+ record = kmalloc(sizeof(*record), GFP_NOFS);
+ if (!record)
+ goto free_head_ref;
}
head_ref->extent_op = extent_op;
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, action, 0);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
spin_unlock(&delayed_refs->lock);
return 0;
+
+free_head_ref:
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+free_ref:
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
+ return -ENOMEM;
}
/*
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_qgroup_extent_record *record = NULL;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
return -ENOMEM;
}
+ if (fs_info->quota_enabled && is_fstree(ref_root)) {
+ record = kmalloc(sizeof(*record), GFP_NOFS);
+ if (!record) {
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+ kmem_cache_free(btrfs_delayed_ref_head_cachep,
+ head_ref);
+ return -ENOMEM;
+ }
+ }
+
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, action, 1);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data);
+ add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
+ num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+ extent_op->is_data);
spin_unlock(&delayed_refs->lock);
return 0;
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
+/*
+ * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
+ * same ref_node structure.
+ * Ref_head is in a higher logic level than tree/data ref, and duplicated
+ * bytenr/num_bytes in ref_node is really a waste or memory, they should be
+ * referred from ref_head.
+ * This gets more disgusting after we use list to store tree/data ref in
+ * ref_head. Must clean this mess up later.
+ */
struct btrfs_delayed_ref_node {
+ /*
+ * ref_head use rb tree, stored in ref_root->href.
+ * indexed by bytenr
+ */
struct rb_node rb_node;
+ /*data/tree ref use list, stored in ref_head->ref_list. */
+ struct list_head list;
+
/* the starting bytenr of the extent */
u64 bytenr;
struct mutex mutex;
spinlock_t lock;
- struct rb_root ref_root;
+ struct list_head ref_list;
struct rb_node href_node;
/* head ref rbtree */
struct rb_root href_root;
+ /* dirty extent records */
+ struct rb_root dirty_extent_root;
+
/* this spin lock protects the rbtree and the entries inside */
spinlock_t lock;
int flushing;
u64 run_delayed_start;
+
+ /*
+ * To make qgroup to skip given root.
+ * This is for snapshot, as btrfs_qgroup_inherit() will manully
+ * modify counters for snapshot and its source, so we should skip
+ * the snapshot in new_root/old_roots or it will get calculated twice
+ */
+ u64 qgroup_to_skip;
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device;
+ ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device);
+ if (ret)
+ btrfs_error(root->fs_info, ret, "kobj add dev failed");
+
printk_in_rcu(KERN_INFO
"BTRFS: dev_replace from %s (devid %llu) to %s started\n",
src_device->missing ? "<missing disk>" :
mutex_unlock(&uuid_mutex);
/* replace the sysfs entry */
- btrfs_kobj_rm_device(fs_info, src_device);
- btrfs_kobj_add_device(fs_info, tgt_device);
+ btrfs_kobj_rm_device(fs_info->fs_devices, src_device);
btrfs_rm_dev_replace_free_srcdev(fs_info, src_device);
/* write back the superblocks */
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret) {
free_extent_buffer(buf);
- return NULL;
+ return ERR_PTR(ret);
}
return buf;
generation = btrfs_root_generation(&root->root_item);
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
generation);
- if (!root->node) {
- ret = -ENOMEM;
+ if (IS_ERR(root->node)) {
+ ret = PTR_ERR(root->node);
goto find_fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO;
- goto read_fail;
+ free_extent_buffer(root->node);
+ goto find_fail;
}
root->commit_root = btrfs_root_node(root);
out:
btrfs_free_path(path);
return root;
-read_fail:
- free_extent_buffer(root->node);
find_fail:
kfree(root);
alloc_fail:
log_tree_root->node = read_tree_block(tree_root, bytenr,
fs_info->generation + 1);
- if (!log_tree_root->node ||
- !extent_buffer_uptodate(log_tree_root->node)) {
+ if (IS_ERR(log_tree_root->node)) {
+ printk(KERN_ERR "BTRFS: failed to read log tree\n");
+ ret = PTR_ERR(log_tree_root->node);
+ kfree(log_tree_root);
+ return ret;
+ } else if (!extent_buffer_uptodate(log_tree_root->node)) {
printk(KERN_ERR "BTRFS: failed to read log tree\n");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
seqlock_init(&fs_info->profiles_lock);
init_rwsem(&fs_info->delayed_iput_sem);
- init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
generation);
- if (!chunk_root->node ||
- !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+ if (IS_ERR(chunk_root->node) ||
+ !extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
tree_root->node = read_tree_block(tree_root,
btrfs_super_root(disk_super),
generation);
- if (!tree_root->node ||
- !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+ if (IS_ERR(tree_root->node) ||
+ !extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
btrfs_close_extra_devices(fs_devices, 1);
+ ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
+ if (ret) {
+ pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
+ goto fail_block_groups;
+ }
+
+ ret = btrfs_sysfs_add_device(fs_devices);
+ if (ret) {
+ pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
+ goto fail_fsdev_sysfs;
+ }
+
ret = btrfs_sysfs_add_one(fs_info);
if (ret) {
pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
- goto fail_block_groups;
+ goto fail_fsdev_sysfs;
}
ret = btrfs_init_space_info(fs_info);
fail_sysfs:
btrfs_sysfs_remove_one(fs_info);
+fail_fsdev_sysfs:
+ btrfs_sysfs_remove_fsid(fs_info->fs_devices);
+
fail_block_groups:
btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
}
btrfs_sysfs_remove_one(fs_info);
+ btrfs_sysfs_remove_fsid(fs_info->fs_devices);
btrfs_free_fs_roots(fs_info);
while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_node *tmp;
bool pin_bytes = false;
head = rb_entry(node, struct btrfs_delayed_ref_head,
continue;
}
spin_lock(&head->lock);
- while ((node = rb_first(&head->ref_root)) != NULL) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
+ list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
+ list) {
ref->in_tree = 0;
- rb_erase(&ref->rb_node, &head->ref_root);
+ list_del(&ref->list);
atomic_dec(&delayed_refs->num_entries);
btrfs_put_delayed_ref(ref);
}
u64 num_bytes, int alloc);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
+ struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extra_op,
- int no_quota);
+ struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
struct btrfs_extent_item *ei);
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes,
+ struct btrfs_delayed_ref_node *node,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add,
- int no_quota,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
struct btrfs_extent_item *item;
struct btrfs_key key;
+ u64 bytenr = node->bytenr;
+ u64 num_bytes = node->num_bytes;
u64 refs;
int ret;
- enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
+ int no_quota = node->no_quota;
path = btrfs_alloc_path();
if (!path)
bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
+ if ((ret < 0 && ret != -EAGAIN) || !ret)
goto out;
- /*
- * Ok we were able to insert an inline extent and it appears to be a new
- * reference, deal with the qgroup accounting.
- */
- if (!ret && !no_quota) {
- ASSERT(root->fs_info->quota_enabled);
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
- type = BTRFS_QGROUP_OPER_ADD_SHARED;
- btrfs_release_path(path);
-
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- bytenr, num_bytes, type, 0);
- goto out;
- }
/*
* Ok we had -EAGAIN which means we didn't have space to insert and
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
- if (refs)
- type = BTRFS_QGROUP_OPER_ADD_SHARED;
btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- if (!no_quota) {
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- bytenr, num_bytes, type, 0);
- if (ret)
- goto out;
- }
-
path->reada = 1;
path->leave_spinning = 1;
/* now insert the actual backref */
ref->objectid, ref->offset,
&ins, node->ref_mod);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
- node->num_bytes, parent,
+ ret = __btrfs_inc_extent_ref(trans, root, node, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
- node->no_quota, extent_op);
+ extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, root, node->bytenr,
- node->num_bytes, parent,
+ ret = __btrfs_free_extent(trans, root, node, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
- extent_op, node->no_quota);
+ extent_op);
} else {
BUG();
}
ref->level, &ins,
node->no_quota);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
- node->num_bytes, parent, ref_root,
- ref->level, 0, 1, node->no_quota,
+ ret = __btrfs_inc_extent_ref(trans, root, node,
+ parent, ref_root,
+ ref->level, 0, 1,
extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, root, node->bytenr,
- node->num_bytes, parent, ref_root,
- ref->level, 0, 1, extent_op,
- node->no_quota);
+ ret = __btrfs_free_extent(trans, root, node,
+ parent, ref_root,
+ ref->level, 0, 1, extent_op);
} else {
BUG();
}
return ret;
}
-static noinline struct btrfs_delayed_ref_node *
+static inline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref, *last = NULL;;
+ if (list_empty(&head->ref_list))
+ return NULL;
- /*
- * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
- * this prevents ref count from going down to zero when
- * there still are pending delayed ref.
- */
- node = rb_first(&head->ref_root);
- while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- if (ref->action == BTRFS_ADD_DELAYED_REF)
- return ref;
- else if (last == NULL)
- last = ref;
- node = rb_next(node);
- }
- return last;
+ return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
+ list);
}
/*
}
}
- /*
- * We need to try and merge add/drops of the same ref since we
- * can run into issues with relocate dropping the implicit ref
- * and then it being added back again before the drop can
- * finish. If we merged anything we need to re-loop so we can
- * get a good ref.
- */
spin_lock(&locked_ref->lock);
- btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
- locked_ref);
/*
* locked_ref is the head node, so we have to go one
spin_unlock(&locked_ref->lock);
spin_lock(&delayed_refs->lock);
spin_lock(&locked_ref->lock);
- if (rb_first(&locked_ref->ref_root) ||
+ if (!list_empty(&locked_ref->ref_list) ||
locked_ref->extent_op) {
spin_unlock(&locked_ref->lock);
spin_unlock(&delayed_refs->lock);
} else {
actual_count++;
ref->in_tree = 0;
- rb_erase(&ref->rb_node, &locked_ref->ref_root);
+ list_del(&ref->list);
}
atomic_dec(&delayed_refs->num_entries);
goto again;
}
out:
- ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
- if (ret)
- return ret;
assert_qgroups_uptodate(trans);
return 0;
}
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_data_ref *data_ref;
struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
int ret = 0;
delayed_refs = &trans->transaction->delayed_refs;
spin_unlock(&delayed_refs->lock);
spin_lock(&head->lock);
- node = rb_first(&head->ref_root);
- while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- node = rb_next(node);
-
+ list_for_each_entry(ref, &head->ref_list, list) {
/* If it's a shared ref we know a cross reference exists */
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
ret = 1;
found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
- found->full = 0;
+ if (total_bytes > 0)
+ found->full = 0;
spin_unlock(&found->lock);
*space_info = found;
return 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_may_use = 0;
- found->full = 0;
+ if (total_bytes > 0)
+ found->full = 0;
+ else
+ found->full = 1;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
found->flush = 0;
!atomic_read(&root->fs_info->open_ioctl_trans)) {
need_commit--;
+ if (need_commit > 0)
+ btrfs_wait_ordered_roots(fs_info, -1);
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
return 1;
}
-static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
+static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
{
u64 num_dev;
else
num_dev = 1; /* DUP or single */
- /* metadata for updaing devices and chunk tree */
- return btrfs_calc_trans_metadata_size(root, num_dev + 1);
+ return num_dev;
}
-static void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type)
+/*
+ * If @is_allocation is true, reserve space in the system space info necessary
+ * for allocating a chunk, otherwise if it's false, reserve space necessary for
+ * removing a chunk.
+ */
+void check_system_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 type)
{
struct btrfs_space_info *info;
u64 left;
u64 thresh;
+ int ret = 0;
+ u64 num_devs;
+
+ /*
+ * Needed because we can end up allocating a system chunk and for an
+ * atomic and race free space reservation in the chunk block reserve.
+ */
+ ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly;
+ info->bytes_reserved - info->bytes_readonly -
+ info->bytes_may_use;
spin_unlock(&info->lock);
- thresh = get_system_chunk_thresh(root, type);
+ num_devs = get_profile_num_devs(root, type);
+
+ /* num_devs device items to update and 1 chunk item to add or remove */
+ thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
+ btrfs_calc_trans_metadata_size(root, 1);
+
if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
u64 flags;
flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
- btrfs_alloc_chunk(trans, root, flags);
+ /*
+ * Ignore failure to create system chunk. We might end up not
+ * needing it, as we might not need to COW all nodes/leafs from
+ * the paths we visit in the chunk tree (they were already COWed
+ * or created in the current transaction for example).
+ */
+ ret = btrfs_alloc_chunk(trans, root, flags);
+ }
+
+ if (!ret) {
+ ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
+ &root->fs_info->chunk_block_rsv,
+ thresh, BTRFS_RESERVE_NO_FLUSH);
+ if (!ret)
+ trans->chunk_bytes_reserved += thresh;
}
}
trans->bytes_reserved = 0;
}
+/*
+ * To be called after all the new block groups attached to the transaction
+ * handle have been created (btrfs_create_pending_block_groups()).
+ */
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->root->fs_info;
+
+ if (!trans->chunk_bytes_reserved)
+ return;
+
+ WARN_ON_ONCE(!list_empty(&trans->new_bgs));
+
+ block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
+ trans->chunk_bytes_reserved);
+ trans->chunk_bytes_reserved = 0;
+}
+
/* Can only return 0 or -ENOSPC */
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode)
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
+ struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extent_op,
- int no_quota)
+ struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_key key;
struct btrfs_path *path;
int extent_slot = 0;
int found_extent = 0;
int num_to_del = 1;
+ int no_quota = node->no_quota;
u32 item_size;
u64 refs;
+ u64 bytenr = node->bytenr;
+ u64 num_bytes = node->num_bytes;
int last_ref = 0;
- enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
refs -= refs_to_drop;
if (refs > 0) {
- type = BTRFS_QGROUP_OPER_SUB_SHARED;
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
/*
}
btrfs_release_path(path);
- /* Deal with the quota accounting */
- if (!ret && last_ref && !no_quota) {
- int mod_seq = 0;
-
- if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
- type == BTRFS_QGROUP_OPER_SUB_SHARED)
- mod_seq = 1;
-
- ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
- bytenr, num_bytes, type,
- mod_seq);
- }
out:
btrfs_free_path(path);
return ret;
goto out_delayed_unlock;
spin_lock(&head->lock);
- if (rb_first(&head->ref_root))
+ if (!list_empty(&head->ref_list))
goto out;
if (head->extent_op) {
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- /* Always set parent to 0 here since its exclusive anyway. */
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- ins->objectid, ins->offset,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
- if (ret)
- return ret;
-
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
- if (!no_quota) {
- ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
- ins->objectid, num_bytes,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
- if (ret)
- return ret;
- }
-
ret = update_block_group(trans, root, ins->objectid, root->nodesize,
1);
if (ret) { /* -ENOENT, logic error */
wc->reada_slot = slot;
}
+/*
+ * TODO: Modify related function to add related node/leaf to dirty_extent_root,
+ * for later qgroup accounting.
+ *
+ * Current, this function does nothing.
+ */
static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
{
int nr = btrfs_header_nritems(eb);
- int i, extent_type, ret;
+ int i, extent_type;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u64 bytenr, num_bytes;
continue;
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
-
- ret = btrfs_qgroup_record_ref(trans, root->fs_info,
- root->objectid,
- bytenr, num_bytes,
- BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
- if (ret)
- return ret;
}
return 0;
}
/*
* root_eb is the subtree root and is locked before this function is called.
+ * TODO: Modify this function to mark all (including complete shared node)
+ * to dirty_extent_root to allow it get accounted in qgroup.
*/
static int account_shared_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
eb = read_tree_block(root, child_bytenr, child_gen);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
+ goto out;
+ } else if (!extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
ret = -EIO;
goto out;
}
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
-
- ret = btrfs_qgroup_record_ref(trans, root->fs_info,
- root->objectid,
- child_bytenr,
- root->nodesize,
- BTRFS_QGROUP_OPER_SUB_SUBTREE,
- 0);
- if (ret)
- goto out;
-
}
if (level == 0) {
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(root, bytenr, generation);
- if (!next || !extent_buffer_uptodate(next)) {
+ if (IS_ERR(next)) {
+ return PTR_ERR(next);
+ } else if (!extent_buffer_uptodate(next)) {
free_extent_buffer(next);
return -EIO;
}
goto out_end_trans;
}
- /*
- * Qgroup update accounting is run from
- * delayed ref handling. This usually works
- * out because delayed refs are normally the
- * only way qgroup updates are added. However,
- * we may have added updates during our tree
- * walk so run qgroups here to make sure we
- * don't lose any updates.
- */
- ret = btrfs_delayed_qgroup_accounting(trans,
- root->fs_info);
- if (ret)
- printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
- "running qgroup updates "
- "during snapshot delete. "
- "Quota is out of sync, "
- "rescan required.\n", ret);
-
btrfs_end_transaction_throttle(trans, tree_root);
if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
pr_debug("BTRFS: drop snapshot early exit\n");
}
root_dropped = true;
out_end_trans:
- ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
- if (ret)
- printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
- "running qgroup updates "
- "during snapshot delete. "
- "Quota is out of sync, "
- "rescan required.\n", ret);
-
btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
free_excluded_extents(root, cache);
+ /*
+ * Call to ensure the corresponding space_info object is created and
+ * assigned to our block group, but don't update its counters just yet.
+ * We want our bg to be added to the rbtree with its ->space_info set.
+ */
+ ret = update_space_info(root->fs_info, cache->flags, 0, 0,
+ &cache->space_info);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ btrfs_put_block_group(cache);
+ return ret;
+ }
+
ret = btrfs_add_block_group_cache(root->fs_info, cache);
if (ret) {
btrfs_remove_free_space_cache(cache);
return ret;
}
+ /*
+ * Now that our block group has its ->space_info set and is inserted in
+ * the rbtree, update the space info's counters.
+ */
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
if (ret) {
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
+ int wake = 0;
+
+ if (bits & EXTENT_LOCKED)
+ wake = 1;
+
+ return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
}
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
free_extent_map(em);
em = NULL;
struct btrfs_log_ctx ctx;
int ret = 0;
bool full_sync = 0;
+ const u64 len = end - start + 1;
trace_btrfs_sync_file(file, datasync);
* all extents are persisted and the respective file extent
* items are in the fs/subvol btree.
*/
- ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+ ret = btrfs_wait_ordered_range(inode, start, len);
} else {
/*
* Start any new ordered operations before starting to log the
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- (full_sync && BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed)) {
+ (BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed &&
+ (full_sync ||
+ !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
{
int ret = 0;
struct btrfs_path *path = btrfs_alloc_path();
+ bool locked = false;
if (!path) {
ret = -ENOMEM;
}
if (block_group) {
+ locked = true;
mutex_lock(&trans->transaction->cache_write_mutex);
if (!list_empty(&block_group->io_list)) {
list_del_init(&block_group->io_list);
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
- if (ret) {
- mutex_unlock(&trans->transaction->cache_write_mutex);
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = btrfs_update_inode(trans, root, inode);
- if (block_group)
- mutex_unlock(&trans->transaction->cache_write_mutex);
-
fail:
+ if (locked)
+ mutex_unlock(&trans->transaction->cache_write_mutex);
if (ret)
btrfs_abort_transaction(trans, root, ret);
}
write_unlock(&map_tree->lock);
+ /*
+ * Keep looping until we have no more ranges in the io tree.
+ * We can have ongoing bios started by readpages (called from readahead)
+ * that didn't get their end io callbacks called yet or they are still
+ * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+ * ranges can still be locked and eviction started because before
+ * submitting those bios, which are executed by a separate task (work
+ * queue kthread), inode references (inode->i_count) were not taken
+ * (which would be dropped in the end io callback of each bio).
+ * Therefore here we effectively end up waiting for those bios and
+ * anyone else holding locked ranges without having bumped the inode's
+ * reference count - if we don't do it, when they access the inode's
+ * io_tree to unlock a range it may be too late, leading to an
+ * use-after-free issue.
+ */
spin_lock(&io_tree->lock);
while (!RB_EMPTY_ROOT(&io_tree->state)) {
struct extent_state *state;
struct extent_state *cached_state = NULL;
+ u64 start;
+ u64 end;
node = rb_first(&io_tree->state);
state = rb_entry(node, struct extent_state, rb_node);
- atomic_inc(&state->refs);
+ start = state->start;
+ end = state->end;
spin_unlock(&io_tree->lock);
- lock_extent_bits(io_tree, state->start, state->end,
- 0, &cached_state);
- clear_extent_bit(io_tree, state->start, state->end,
+ lock_extent_bits(io_tree, start, end, 0, &cached_state);
+ clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 1,
&cached_state, GFP_NOFS);
- free_extent_state(state);
cond_resched();
spin_lock(&io_tree->lock);
key.offset = (u64)-1;
new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
if (IS_ERR(new_root)) {
- btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
ret = PTR_ERR(new_root);
+ btrfs_abort_transaction(trans, root, ret);
goto fail;
}
i = range->start >> PAGE_CACHE_SHIFT;
}
if (!max_to_defrag)
- max_to_defrag = last_index + 1;
+ max_to_defrag = last_index - i + 1;
/*
* make writeback starts from i, so the defrag range can be
ra_index = max(i, ra_index);
btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
cluster);
- ra_index += max_cluster;
+ ra_index += cluster;
}
mutex_lock(&inode->i_mutex);
{
struct btrfs_ioctl_ino_lookup_args *args;
struct inode *inode;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ int ret = 0;
args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args))
inode = file_inode(file);
+ /*
+ * Unprivileged query to obtain the containing subvolume root id. The
+ * path is reset so it's consistent with btrfs_search_path_in_tree.
+ */
if (args->treeid == 0)
args->treeid = BTRFS_I(inode)->root->root_key.objectid;
+ if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
+ args->name[0] = 0;
+ goto out;
+ }
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out;
+ }
+
ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
args->treeid, args->objectid,
args->name);
+out:
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
ret = -EFAULT;
goto out_unlock_inode;
}
- d_invalidate(dentry);
-
down_write(&root->fs_info->subvol_sem);
err = may_destroy_subvol(dest);
out_unlock_inode:
mutex_unlock(&inode->i_mutex);
if (!err) {
- shrink_dcache_sb(root->fs_info->sb);
+ d_invalidate(dentry);
btrfs_invalidate_inodes(dest);
d_delete(dentry);
ASSERT(dest->send_in_progress == 0);
return ret;
}
-static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
+ u64 olen)
{
+ u64 len = *plen;
u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
- if (off + len > inode->i_size || off + len < off)
+ if (off + olen > inode->i_size || off + olen < off)
return -EINVAL;
+
+ /* if we extend to eof, continue to block boundary */
+ if (off + len == inode->i_size)
+ *plen = len = ALIGN(inode->i_size, bs) - off;
+
/* Check that we are block aligned - btrfs_clone() requires this */
if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
return -EINVAL;
return 0;
}
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
struct inode *dst, u64 dst_loff)
{
int ret;
+ u64 len = olen;
/*
* btrfs_clone() can't handle extents in the same file
btrfs_double_lock(src, loff, dst, dst_loff, len);
- ret = extent_same_check_offsets(src, loff, len);
+ ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
goto out_unlock;
- ret = extent_same_check_offsets(dst, dst_loff, len);
+ ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
if (ret)
goto out_unlock;
ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
if (ret == 0)
- ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+ ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
out_unlock:
btrfs_double_unlock(src, loff, dst, dst_loff, len);
entry->file_offset = file_offset;
entry->start = start;
entry->len = len;
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
- !(type == BTRFS_ORDERED_NOCOW))
- entry->csum_bytes_left = disk_len;
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = igrab(inode);
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
- WARN_ON(entry->csum_bytes_left < sum->len);
- entry->csum_bytes_left -= sum->len;
- if (entry->csum_bytes_left == 0)
- wake_up(&entry->wait);
spin_unlock_irq(&tree->lock);
}
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
- list_add_tail(&ordered->trans_list, &trans->ordered);
+ /*
+ * If our ordered extent completed it means it updated the
+ * fs/subvol and csum trees already, so no need to make the
+ * current transaction's commit wait for it, as we end up
+ * holding memory unnecessarily and delaying the inode's iput
+ * until the transaction commit (we schedule an iput for the
+ * inode when the ordered extent's refcount drops to 0), which
+ * prevents it from being evictable until the transaction
+ * commits.
+ */
+ if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags))
+ btrfs_put_ordered_extent(ordered);
+ else
+ list_add_tail(&ordered->trans_list, &trans->ordered);
+
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
return entry;
}
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+ u64 file_offset,
+ u64 len)
+{
+ struct btrfs_ordered_extent *oe;
+
+ oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+ if (oe) {
+ btrfs_put_ordered_extent(oe);
+ return true;
+ }
+ return false;
+}
+
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found
/* number of bytes that still need writing */
u64 bytes_left;
- /* number of bytes that still need csumming */
- u64 csum_bytes_left;
-
/*
* the end of the ordered extent which is behind it but
* didn't update disk_i_size. Please see the comment of
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len);
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+ u64 file_offset,
+ u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
#include "extent_io.h"
#include "qgroup.h"
+
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
/*
* temp variables for accounting operations
+ * Refer to qgroup_shared_accouting() for details.
*/
u64 old_refcnt;
u64 new_refcnt;
};
+static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
+ int mod)
+{
+ if (qg->old_refcnt < seq)
+ qg->old_refcnt = seq;
+ qg->old_refcnt += mod;
+}
+
+static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
+ int mod)
+{
+ if (qg->new_refcnt < seq)
+ qg->new_refcnt = seq;
+ qg->new_refcnt += mod;
+}
+
+static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+ if (qg->old_refcnt < seq)
+ return 0;
+ return qg->old_refcnt - seq;
+}
+
+static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+ if (qg->new_refcnt < seq)
+ return 0;
+ return qg->new_refcnt - seq;
+}
+
/*
* glue structure to represent the relations between qgroups.
*/
struct ulist *tmp;
int ret = 0;
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
-
/* Check the level of src and dst first */
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL;
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
if (!quota_root) {
return ret;
}
-static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
- struct btrfs_qgroup_operation *oper2)
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
- /*
- * Ignore seq and type here, we're looking for any operation
- * at all related to this extent on that root.
- */
- if (oper1->bytenr < oper2->bytenr)
- return -1;
- if (oper1->bytenr > oper2->bytenr)
- return 1;
- if (oper1->ref_root < oper2->ref_root)
- return -1;
- if (oper1->ref_root > oper2->ref_root)
- return 1;
- return 0;
-}
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct rb_node *node;
+ u64 qgroup_to_skip;
+ int ret = 0;
-static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct rb_node *n;
- struct btrfs_qgroup_operation *cur;
- int cmp;
+ delayed_refs = &trans->transaction->delayed_refs;
+ qgroup_to_skip = delayed_refs->qgroup_to_skip;
- spin_lock(&fs_info->qgroup_op_lock);
- n = fs_info->qgroup_op_tree.rb_node;
- while (n) {
- cur = rb_entry(n, struct btrfs_qgroup_operation, n);
- cmp = comp_oper_exist(cur, oper);
- if (cmp < 0) {
- n = n->rb_right;
- } else if (cmp) {
- n = n->rb_left;
- } else {
- spin_unlock(&fs_info->qgroup_op_lock);
- return -EEXIST;
- }
+ /*
+ * No need to do lock, since this function will only be called in
+ * btrfs_commmit_transaction().
+ */
+ node = rb_first(&delayed_refs->dirty_extent_root);
+ while (node) {
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
+ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
+ &record->old_roots);
+ if (ret < 0)
+ break;
+ if (qgroup_to_skip)
+ ulist_del(record->old_roots, qgroup_to_skip, 0);
+ node = rb_next(node);
}
- spin_unlock(&fs_info->qgroup_op_lock);
- return 0;
-}
-
-static int comp_oper(struct btrfs_qgroup_operation *oper1,
- struct btrfs_qgroup_operation *oper2)
-{
- if (oper1->bytenr < oper2->bytenr)
- return -1;
- if (oper1->bytenr > oper2->bytenr)
- return 1;
- if (oper1->ref_root < oper2->ref_root)
- return -1;
- if (oper1->ref_root > oper2->ref_root)
- return 1;
- if (oper1->seq < oper2->seq)
- return -1;
- if (oper1->seq > oper2->seq)
- return 1;
- if (oper1->type < oper2->type)
- return -1;
- if (oper1->type > oper2->type)
- return 1;
- return 0;
+ return ret;
}
-static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record)
{
- struct rb_node **p;
- struct rb_node *parent = NULL;
- struct btrfs_qgroup_operation *cur;
- int cmp;
+ struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
+ struct rb_node *parent_node = NULL;
+ struct btrfs_qgroup_extent_record *entry;
+ u64 bytenr = record->bytenr;
- spin_lock(&fs_info->qgroup_op_lock);
- p = &fs_info->qgroup_op_tree.rb_node;
while (*p) {
- parent = *p;
- cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
- cmp = comp_oper(cur, oper);
- if (cmp < 0) {
- p = &(*p)->rb_right;
- } else if (cmp) {
+ parent_node = *p;
+ entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
+ node);
+ if (bytenr < entry->bytenr)
p = &(*p)->rb_left;
- } else {
- spin_unlock(&fs_info->qgroup_op_lock);
- return -EEXIST;
- }
- }
- rb_link_node(&oper->n, parent, p);
- rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
- spin_unlock(&fs_info->qgroup_op_lock);
- return 0;
-}
-
-/*
- * Record a quota operation for processing later on.
- * @trans: the transaction we are adding the delayed op to.
- * @fs_info: the fs_info for this fs.
- * @ref_root: the root of the reference we are acting on,
- * @bytenr: the bytenr we are acting on.
- * @num_bytes: the number of bytes in the reference.
- * @type: the type of operation this is.
- * @mod_seq: do we need to get a sequence number for looking up roots.
- *
- * We just add it to our trans qgroup_ref_list and carry on and process these
- * operations in order at some later point. If the reference root isn't a fs
- * root then we don't bother with doing anything.
- *
- * MUST BE HOLDING THE REF LOCK.
- */
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 ref_root,
- u64 bytenr, u64 num_bytes,
- enum btrfs_qgroup_operation_type type, int mod_seq)
-{
- struct btrfs_qgroup_operation *oper;
- int ret;
-
- if (!is_fstree(ref_root) || !fs_info->quota_enabled)
- return 0;
-
- oper = kmalloc(sizeof(*oper), GFP_NOFS);
- if (!oper)
- return -ENOMEM;
-
- oper->ref_root = ref_root;
- oper->bytenr = bytenr;
- oper->num_bytes = num_bytes;
- oper->type = type;
- oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
- INIT_LIST_HEAD(&oper->elem.list);
- oper->elem.seq = 0;
-
- trace_btrfs_qgroup_record_ref(oper);
-
- if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
- /*
- * If any operation for this bytenr/ref_root combo
- * exists, then we know it's not exclusively owned and
- * shouldn't be queued up.
- *
- * This also catches the case where we have a cloned
- * extent that gets queued up multiple times during
- * drop snapshot.
- */
- if (qgroup_oper_exists(fs_info, oper)) {
- kfree(oper);
- return 0;
- }
- }
-
- ret = insert_qgroup_oper(fs_info, oper);
- if (ret) {
- /* Shouldn't happen so have an assert for developers */
- ASSERT(0);
- kfree(oper);
- return ret;
+ else if (bytenr > entry->bytenr)
+ p = &(*p)->rb_right;
+ else
+ return entry;
}
- list_add_tail(&oper->list, &trans->qgroup_ref_list);
- if (mod_seq)
- btrfs_get_tree_mod_seq(fs_info, &oper->elem);
-
- return 0;
-}
-
-static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *tmp;
- int sign = 0;
- int ret = 0;
-
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
-
- spin_lock(&fs_info->qgroup_lock);
- if (!fs_info->quota_root)
- goto out;
-
- switch (oper->type) {
- case BTRFS_QGROUP_OPER_ADD_EXCL:
- sign = 1;
- break;
- case BTRFS_QGROUP_OPER_SUB_EXCL:
- sign = -1;
- break;
- default:
- ASSERT(0);
- }
- ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
- oper->num_bytes, sign);
-out:
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(tmp);
- return ret;
+ rb_link_node(&record->node, parent_node, p);
+ rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
+ return NULL;
}
+#define UPDATE_NEW 0
+#define UPDATE_OLD 1
/*
- * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
- * properly.
+ * Walk all of the roots that points to the bytenr and adjust their refcnts.
*/
-static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
- u64 root_to_skip, struct ulist *tmp,
- struct ulist *roots, struct ulist *qgroups,
- u64 seq, int *old_roots, int rescan)
+static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ struct ulist *qgroups, u64 seq, int update_old)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct ulist_node *tmp_unode;
struct ulist_iterator tmp_uiter;
struct btrfs_qgroup *qg;
- int ret;
+ int ret = 0;
+ if (!roots)
+ return 0;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
- /* We don't count our current root here */
- if (unode->val == root_to_skip)
- continue;
qg = find_qgroup_rb(fs_info, unode->val);
if (!qg)
continue;
- /*
- * We could have a pending removal of this same ref so we may
- * not have actually found our ref root when doing
- * btrfs_find_all_roots, so we need to keep track of how many
- * old roots we find in case we removed ours and added a
- * different one at the same time. I don't think this could
- * happen in practice but that sort of thinking leads to pain
- * and suffering and to the dark side.
- */
- (*old_roots)++;
ulist_reinit(tmp);
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
struct btrfs_qgroup_list *glist;
qg = u64_to_ptr(tmp_unode->aux);
- /*
- * We use this sequence number to keep from having to
- * run the whole list and 0 out the refcnt every time.
- * We basically use sequnce as the known 0 count and
- * then add 1 everytime we see a qgroup. This is how we
- * get how many of the roots actually point up to the
- * upper level qgroups in order to determine exclusive
- * counts.
- *
- * For rescan we want to set old_refcnt to seq so our
- * exclusive calculations end up correct.
- */
- if (rescan)
- qg->old_refcnt = seq;
- else if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
+ if (update_old)
+ btrfs_qgroup_update_old_refcnt(qg, seq, 1);
else
- qg->old_refcnt++;
-
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
+ btrfs_qgroup_update_new_refcnt(qg, seq, 1);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(qgroups, glist->group->qgroupid,
ptr_to_u64(glist->group),
}
/*
- * We need to walk forward in our operation tree and account for any roots that
- * were deleted after we made this operation.
- */
-static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper,
- struct ulist *tmp,
- struct ulist *qgroups, u64 seq,
- int *old_roots)
-{
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup *qg;
- struct btrfs_qgroup_operation *tmp_oper;
- struct rb_node *n;
- int ret;
-
- ulist_reinit(tmp);
-
- /*
- * We only walk forward in the tree since we're only interested in
- * removals that happened _after_ our operation.
- */
- spin_lock(&fs_info->qgroup_op_lock);
- n = rb_next(&oper->n);
- spin_unlock(&fs_info->qgroup_op_lock);
- if (!n)
- return 0;
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
- while (tmp_oper->bytenr == oper->bytenr) {
- /*
- * If it's not a removal we don't care, additions work out
- * properly with our refcnt tracking.
- */
- if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
- tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
- goto next;
- qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
- if (!qg)
- goto next;
- ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
- GFP_ATOMIC);
- if (ret) {
- if (ret < 0)
- return ret;
- /*
- * We only want to increase old_roots if this qgroup is
- * not already in the list of qgroups. If it is already
- * there then that means it must have been re-added or
- * the delete will be discarded because we had an
- * existing ref that we haven't looked up yet. In this
- * case we don't want to increase old_roots. So if ret
- * == 1 then we know that this is the first time we've
- * seen this qgroup and we can bump the old_roots.
- */
- (*old_roots)++;
- ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
-next:
- spin_lock(&fs_info->qgroup_op_lock);
- n = rb_next(&tmp_oper->n);
- spin_unlock(&fs_info->qgroup_op_lock);
- if (!n)
- break;
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
- }
-
- /* Ok now process the qgroups we found */
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = u64_to_ptr(unode->aux);
- if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
- else
- qg->old_refcnt++;
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
- list_for_each_entry(glist, &qg->groups, next_group) {
- ret = ulist_add(qgroups, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/* Add refcnt for the newly added reference. */
-static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper,
- struct btrfs_qgroup *qgroup,
- struct ulist *tmp, struct ulist *qgroups,
- u64 seq)
-{
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup *qg;
- int ret;
-
- ulist_reinit(tmp);
- ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = u64_to_ptr(unode->aux);
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
- } else {
- if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
- else
- qg->old_refcnt++;
- }
- list_for_each_entry(glist, &qg->groups, next_group) {
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(qgroups, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * This adjusts the counters for all referenced qgroups if need be.
+ * Update qgroup rfer/excl counters.
+ * Rfer update is easy, codes can explain themselves.
+ *
+ * Excl update is tricky, the update is split into 2 part.
+ * Part 1: Possible exclusive <-> sharing detect:
+ * | A | !A |
+ * -------------------------------------
+ * B | * | - |
+ * -------------------------------------
+ * !B | + | ** |
+ * -------------------------------------
+ *
+ * Conditions:
+ * A: cur_old_roots < nr_old_roots (not exclusive before)
+ * !A: cur_old_roots == nr_old_roots (possible exclusive before)
+ * B: cur_new_roots < nr_new_roots (not exclusive now)
+ * !B: cur_new_roots == nr_new_roots (possible exclsuive now)
+ *
+ * Results:
+ * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
+ * *: Definitely not changed. **: Possible unchanged.
+ *
+ * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
+ *
+ * To make the logic clear, we first use condition A and B to split
+ * combination into 4 results.
+ *
+ * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
+ * only on variant maybe 0.
+ *
+ * Lastly, check result **, since there are 2 variants maybe 0, split them
+ * again(2x2).
+ * But this time we don't need to consider other things, the codes and logic
+ * is easy to understand now.
*/
-static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
- u64 root_to_skip, u64 num_bytes,
- struct ulist *qgroups, u64 seq,
- int old_roots, int new_roots, int rescan)
+static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
+ struct ulist *qgroups,
+ u64 nr_old_roots,
+ u64 nr_new_roots,
+ u64 num_bytes, u64 seq)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
bool dirty = false;
qg = u64_to_ptr(unode->aux);
- /*
- * Wasn't referenced before but is now, add to the reference
- * counters.
- */
- if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
+ cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
+ cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
+
+ /* Rfer update part */
+ if (cur_old_count == 0 && cur_new_count > 0) {
qg->rfer += num_bytes;
qg->rfer_cmpr += num_bytes;
dirty = true;
}
-
- /*
- * Was referenced before but isn't now, subtract from the
- * reference counters.
- */
- if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
+ if (cur_old_count > 0 && cur_new_count == 0) {
qg->rfer -= num_bytes;
qg->rfer_cmpr -= num_bytes;
dirty = true;
}
- if (qg->old_refcnt < seq)
- cur_old_count = 0;
- else
- cur_old_count = qg->old_refcnt - seq;
- if (qg->new_refcnt < seq)
- cur_new_count = 0;
- else
- cur_new_count = qg->new_refcnt - seq;
-
- /*
- * If our refcount was the same as the roots previously but our
- * new count isn't the same as the number of roots now then we
- * went from having a exclusive reference on this range to not.
- */
- if (old_roots && cur_old_count == old_roots &&
- (cur_new_count != new_roots || new_roots == 0)) {
- WARN_ON(cur_new_count != new_roots && new_roots == 0);
- qg->excl -= num_bytes;
- qg->excl_cmpr -= num_bytes;
- dirty = true;
+ /* Excl update part */
+ /* Exclusive/none -> shared case */
+ if (cur_old_count == nr_old_roots &&
+ cur_new_count < nr_new_roots) {
+ /* Exclusive -> shared */
+ if (cur_old_count != 0) {
+ qg->excl -= num_bytes;
+ qg->excl_cmpr -= num_bytes;
+ dirty = true;
+ }
}
- /*
- * If we didn't reference all the roots before but now we do we
- * have an exclusive reference to this range.
- */
- if ((!old_roots || (old_roots && cur_old_count != old_roots))
- && cur_new_count == new_roots) {
- qg->excl += num_bytes;
- qg->excl_cmpr += num_bytes;
- dirty = true;
+ /* Shared -> exclusive/none case */
+ if (cur_old_count < nr_old_roots &&
+ cur_new_count == nr_new_roots) {
+ /* Shared->exclusive */
+ if (cur_new_count != 0) {
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
+ dirty = true;
+ }
}
+ /* Exclusive/none -> exclusive/none case */
+ if (cur_old_count == nr_old_roots &&
+ cur_new_count == nr_new_roots) {
+ if (cur_old_count == 0) {
+ /* None -> exclusive/none */
+
+ if (cur_new_count != 0) {
+ /* None -> exclusive */
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
+ dirty = true;
+ }
+ /* None -> none, nothing changed */
+ } else {
+ /* Exclusive -> exclusive/none */
+
+ if (cur_new_count == 0) {
+ /* Exclusive -> none */
+ qg->excl -= num_bytes;
+ qg->excl_cmpr -= num_bytes;
+ dirty = true;
+ }
+ /* Exclusive -> exclusive, nothing changed */
+ }
+ }
if (dirty)
qgroup_dirty(fs_info, qg);
}
return 0;
}
-/*
- * If we removed a data extent and there were other references for that bytenr
- * then we need to lookup all referenced roots to make sure we still don't
- * reference this bytenr. If we do then we can just discard this operation.
- */
-static int check_existing_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *roots = NULL;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- int ret = 0;
-
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
- oper->elem.seq, &roots);
- if (ret < 0)
- return ret;
- ret = 0;
-
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- if (unode->val == oper->ref_root) {
- ret = 1;
- break;
- }
- }
- ulist_free(roots);
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-
- return ret;
-}
-
-/*
- * If we share a reference across multiple roots then we may need to adjust
- * various qgroups referenced and exclusive counters. The basic premise is this
- *
- * 1) We have seq to represent a 0 count. Instead of looping through all of the
- * qgroups and resetting their refcount to 0 we just constantly bump this
- * sequence number to act as the base reference count. This means that if
- * anybody is equal to or below this sequence they were never referenced. We
- * jack this sequence up by the number of roots we found each time in order to
- * make sure we don't have any overlap.
- *
- * 2) We first search all the roots that reference the area _except_ the root
- * we're acting on currently. This makes up the old_refcnt of all the qgroups
- * before.
- *
- * 3) We walk all of the qgroups referenced by the root we are currently acting
- * on, and will either adjust old_refcnt in the case of a removal or the
- * new_refcnt in the case of an addition.
- *
- * 4) Finally we walk all the qgroups that are referenced by this range
- * including the root we are acting on currently. We will adjust the counters
- * based on the number of roots we had and will have after this operation.
- *
- * Take this example as an illustration
- *
- * [qgroup 1/0]
- * / | \
- * [qg 0/0] [qg 0/1] [qg 0/2]
- * \ | /
- * [ extent ]
- *
- * Say we are adding a reference that is covered by qg 0/0. The first step
- * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
- * old_roots being 2. Because it is adding new_roots will be 1. We then go
- * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
- * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
- * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
- * reference and thus must add the size to the referenced bytes. Everything
- * else is the same so nothing else changes.
- */
-static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 num_bytes,
+ struct ulist *old_roots, struct ulist *new_roots)
{
- struct ulist *roots = NULL;
- struct ulist *qgroups, *tmp;
- struct btrfs_qgroup *qgroup;
- struct seq_list elem = SEQ_LIST_INIT(elem);
+ struct ulist *qgroups = NULL;
+ struct ulist *tmp = NULL;
u64 seq;
- int old_roots = 0;
- int new_roots = 0;
+ u64 nr_new_roots = 0;
+ u64 nr_old_roots = 0;
int ret = 0;
- if (oper->elem.seq) {
- ret = check_existing_refs(trans, fs_info, oper);
- if (ret < 0)
- return ret;
- if (ret)
- return 0;
- }
+ if (new_roots)
+ nr_new_roots = new_roots->nnodes;
+ if (old_roots)
+ nr_old_roots = old_roots->nnodes;
- qgroups = ulist_alloc(GFP_NOFS);
- if (!qgroups)
- return -ENOMEM;
+ if (!fs_info->quota_enabled)
+ goto out_free;
+ BUG_ON(!fs_info->quota_root);
+ qgroups = ulist_alloc(GFP_NOFS);
+ if (!qgroups) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
tmp = ulist_alloc(GFP_NOFS);
if (!tmp) {
- ulist_free(qgroups);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_free;
}
- btrfs_get_tree_mod_seq(fs_info, &elem);
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
- &roots);
- btrfs_put_tree_mod_seq(fs_info, &elem);
- if (ret < 0) {
- ulist_free(qgroups);
- ulist_free(tmp);
- return ret;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ ret = 0;
+ goto out_free;
+ }
}
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
spin_lock(&fs_info->qgroup_lock);
- qgroup = find_qgroup_rb(fs_info, oper->ref_root);
- if (!qgroup)
- goto out;
seq = fs_info->qgroup_seq;
- /*
- * So roots is the list of all the roots currently pointing at the
- * bytenr, including the ref we are adding if we are adding, or not if
- * we are removing a ref. So we pass in the ref_root to skip that root
- * in our calculations. We set old_refnct and new_refcnt cause who the
- * hell knows what everything looked like before, and it doesn't matter
- * except...
- */
- ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
- seq, &old_roots, 0);
+ /* Update old refcnts using old_roots */
+ ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
+ UPDATE_OLD);
if (ret < 0)
goto out;
- /*
- * Now adjust the refcounts of the qgroups that care about this
- * reference, either the old_count in the case of removal or new_count
- * in the case of an addition.
- */
- ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
- seq);
+ /* Update new refcnts using new_roots */
+ ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
+ UPDATE_NEW);
if (ret < 0)
goto out;
- /*
- * ...in the case of removals. If we had a removal before we got around
- * to processing this operation then we need to find that guy and count
- * his references as if they really existed so we don't end up screwing
- * up the exclusive counts. Then whenever we go to process the delete
- * everything will be grand and we can account for whatever exclusive
- * changes need to be made there. We also have to pass in old_roots so
- * we have an accurate count of the roots as it pertains to this
- * operations view of the world.
- */
- ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
- &old_roots);
- if (ret < 0)
- goto out;
+ qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
+ num_bytes, seq);
/*
- * We are adding our root, need to adjust up the number of roots,
- * otherwise old_roots is the number of roots we want.
+ * Bump qgroup_seq to avoid seq overlap
*/
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
- new_roots = old_roots + 1;
- } else {
- new_roots = old_roots;
- old_roots++;
- }
- fs_info->qgroup_seq += old_roots + 1;
-
-
- /*
- * And now the magic happens, bless Arne for having a pretty elegant
- * solution for this.
- */
- qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
- qgroups, seq, old_roots, new_roots, 0);
+ fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
out:
spin_unlock(&fs_info->qgroup_lock);
- ulist_free(qgroups);
- ulist_free(roots);
+out_free:
ulist_free(tmp);
+ ulist_free(qgroups);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
return ret;
}
-/*
- * Process a reference to a shared subtree. This type of operation is
- * queued during snapshot removal when we encounter extents which are
- * shared between more than one root.
- */
-static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *roots = NULL;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup_list *glist;
- struct ulist *parents;
- int ret = 0;
- int err;
- struct btrfs_qgroup *qg;
- u64 root_obj = 0;
- struct seq_list elem = SEQ_LIST_INIT(elem);
-
- parents = ulist_alloc(GFP_NOFS);
- if (!parents)
- return -ENOMEM;
-
- btrfs_get_tree_mod_seq(fs_info, &elem);
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
- elem.seq, &roots);
- btrfs_put_tree_mod_seq(fs_info, &elem);
- if (ret < 0)
- goto out;
-
- if (roots->nnodes != 1)
- goto out;
-
- ULIST_ITER_INIT(&uiter);
- unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
- /*
- * If we find our ref root then that means all refs
- * this extent has to the root have not yet been
- * deleted. In that case, we do nothing and let the
- * last ref for this bytenr drive our update.
- *
- * This can happen for example if an extent is
- * referenced multiple times in a snapshot (clone,
- * etc). If we are in the middle of snapshot removal,
- * queued updates for such an extent will find the
- * root if we have not yet finished removing the
- * snapshot.
- */
- if (unode->val == oper->ref_root)
- goto out;
-
- root_obj = unode->val;
- BUG_ON(!root_obj);
-
- spin_lock(&fs_info->qgroup_lock);
- qg = find_qgroup_rb(fs_info, root_obj);
- if (!qg)
- goto out_unlock;
-
- qg->excl += oper->num_bytes;
- qg->excl_cmpr += oper->num_bytes;
- qgroup_dirty(fs_info, qg);
-
- /*
- * Adjust counts for parent groups. First we find all
- * parents, then in the 2nd loop we do the adjustment
- * while adding parents of the parents to our ulist.
- */
- list_for_each_entry(glist, &qg->groups, next_group) {
- err = ulist_add(parents, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (err < 0) {
- ret = err;
- goto out_unlock;
- }
- }
-
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(parents, &uiter))) {
- qg = u64_to_ptr(unode->aux);
- qg->excl += oper->num_bytes;
- qg->excl_cmpr += oper->num_bytes;
- qgroup_dirty(fs_info, qg);
-
- /* Add any parents of the parents */
- list_for_each_entry(glist, &qg->groups, next_group) {
- err = ulist_add(parents, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (err < 0) {
- ret = err;
- goto out_unlock;
- }
- }
- }
-
-out_unlock:
- spin_unlock(&fs_info->qgroup_lock);
-
-out:
- ulist_free(roots);
- ulist_free(parents);
- return ret;
-}
-
-/*
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
- * from the fs. First, all roots referencing the extent are searched, and
- * then the space is accounted accordingly to the different roots. The
- * accounting algorithm works in 3 steps documented inline.
- */
-static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct ulist *new_roots = NULL;
+ struct rb_node *node;
+ u64 qgroup_to_skip;
int ret = 0;
- if (!fs_info->quota_enabled)
- return 0;
-
- BUG_ON(!fs_info->quota_root);
+ delayed_refs = &trans->transaction->delayed_refs;
+ qgroup_to_skip = delayed_refs->qgroup_to_skip;
+ while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
- mutex_lock(&fs_info->qgroup_rescan_lock);
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
- if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
- mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ if (!ret) {
+ /*
+ * Use (u64)-1 as time_seq to do special search, which
+ * doesn't lock tree or delayed_refs and search current
+ * root. It's safe inside commit_transaction().
+ */
+ ret = btrfs_find_all_roots(trans, fs_info,
+ record->bytenr, (u64)-1, &new_roots);
+ if (ret < 0)
+ goto cleanup;
+ if (qgroup_to_skip)
+ ulist_del(new_roots, qgroup_to_skip, 0);
+ ret = btrfs_qgroup_account_extent(trans, fs_info,
+ record->bytenr, record->num_bytes,
+ record->old_roots, new_roots);
+ record->old_roots = NULL;
+ new_roots = NULL;
}
- }
- mutex_unlock(&fs_info->qgroup_rescan_lock);
+cleanup:
+ ulist_free(record->old_roots);
+ ulist_free(new_roots);
+ new_roots = NULL;
+ rb_erase(node, &delayed_refs->dirty_extent_root);
+ kfree(record);
- ASSERT(is_fstree(oper->ref_root));
-
- trace_btrfs_qgroup_account(oper);
-
- switch (oper->type) {
- case BTRFS_QGROUP_OPER_ADD_EXCL:
- case BTRFS_QGROUP_OPER_SUB_EXCL:
- ret = qgroup_excl_accounting(fs_info, oper);
- break;
- case BTRFS_QGROUP_OPER_ADD_SHARED:
- case BTRFS_QGROUP_OPER_SUB_SHARED:
- ret = qgroup_shared_accounting(trans, fs_info, oper);
- break;
- case BTRFS_QGROUP_OPER_SUB_SUBTREE:
- ret = qgroup_subtree_accounting(trans, fs_info, oper);
- break;
- default:
- ASSERT(0);
- }
- return ret;
-}
-
-/*
- * Needs to be called everytime we run delayed refs, even if there is an error
- * in order to cleanup outstanding operations.
- */
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_qgroup_operation *oper;
- int ret = 0;
-
- while (!list_empty(&trans->qgroup_ref_list)) {
- oper = list_first_entry(&trans->qgroup_ref_list,
- struct btrfs_qgroup_operation, list);
- list_del_init(&oper->list);
- if (!ret || !trans->aborted)
- ret = btrfs_qgroup_account(trans, fs_info, oper);
- spin_lock(&fs_info->qgroup_op_lock);
- rb_erase(&oper->n, &fs_info->qgroup_op_tree);
- spin_unlock(&fs_info->qgroup_op_lock);
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
- kfree(oper);
}
return ret;
}
*/
static int
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- struct btrfs_trans_handle *trans, struct ulist *qgroups,
- struct ulist *tmp, struct extent_buffer *scratch_leaf)
+ struct btrfs_trans_handle *trans,
+ struct extent_buffer *scratch_leaf)
{
struct btrfs_key found;
struct ulist *roots = NULL;
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
- u64 seq;
- int new_roots;
int slot;
int ret;
else
num_bytes = found.offset;
- ulist_reinit(qgroups);
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
&roots);
if (ret < 0)
goto out;
- spin_lock(&fs_info->qgroup_lock);
- seq = fs_info->qgroup_seq;
- fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
-
- new_roots = 0;
- ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
- seq, &new_roots, 1);
- if (ret < 0) {
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
- goto out;
- }
-
- ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
- seq, 0, new_roots, 1);
- if (ret < 0) {
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
+ /* For rescan, just pass old_roots as NULL */
+ ret = btrfs_qgroup_account_extent(trans, fs_info,
+ found.objectid, num_bytes, NULL, roots);
+ if (ret < 0)
goto out;
- }
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
}
out:
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
qgroup_rescan_work);
struct btrfs_path *path;
struct btrfs_trans_handle *trans = NULL;
- struct ulist *tmp = NULL, *qgroups = NULL;
struct extent_buffer *scratch_leaf = NULL;
int err = -ENOMEM;
int ret = 0;
path = btrfs_alloc_path();
if (!path)
goto out;
- qgroups = ulist_alloc(GFP_NOFS);
- if (!qgroups)
- goto out;
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- goto out;
scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
if (!scratch_leaf)
goto out;
err = -EINTR;
} else {
err = qgroup_rescan_leaf(fs_info, path, trans,
- qgroups, tmp, scratch_leaf);
+ scratch_leaf);
}
if (err > 0)
btrfs_commit_transaction(trans, fs_info->fs_root);
out:
kfree(scratch_leaf);
- ulist_free(qgroups);
- ulist_free(tmp);
btrfs_free_path(path);
mutex_lock(&fs_info->qgroup_rescan_lock);
#ifndef __BTRFS_QGROUP__
#define __BTRFS_QGROUP__
+#include "ulist.h"
+#include "delayed-ref.h"
+
/*
- * A description of the operations, all of these operations only happen when we
- * are adding the 1st reference for that subvolume in the case of adding space
- * or on the last reference delete in the case of subtraction. The only
- * exception is the last one, which is added for confusion.
- *
- * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
- * one pointing at the bytes we are adding. This is called on the first
- * allocation.
- *
- * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
- * shared between subvols. This is called on the creation of a ref that already
- * has refs from a different subvolume, so basically reflink.
- *
- * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
- * one referencing the range.
- *
- * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
- * refs with other subvolumes.
+ * Record a dirty extent, and info qgroup to update quota on it
+ * TODO: Use kmem cache to alloc it.
*/
-enum btrfs_qgroup_operation_type {
- BTRFS_QGROUP_OPER_ADD_EXCL,
- BTRFS_QGROUP_OPER_ADD_SHARED,
- BTRFS_QGROUP_OPER_SUB_EXCL,
- BTRFS_QGROUP_OPER_SUB_SHARED,
- BTRFS_QGROUP_OPER_SUB_SUBTREE,
-};
-
-struct btrfs_qgroup_operation {
- u64 ref_root;
+struct btrfs_qgroup_extent_record {
+ struct rb_node node;
u64 bytenr;
u64 num_bytes;
- u64 seq;
- enum btrfs_qgroup_operation_type type;
- struct seq_list elem;
- struct rb_node n;
- struct list_head list;
+ struct ulist *old_roots;
};
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 ref_root,
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record);
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes,
- enum btrfs_qgroup_operation_type type,
- int mod_seq);
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper);
+ struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
}
eb = read_tree_block(dest, old_bytenr, old_ptr_gen);
- if (!eb || !extent_buffer_uptodate(eb)) {
- ret = (!eb) ? -ENOMEM : -EIO;
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
+ ret = -EIO;
free_extent_buffer(eb);
break;
}
bytenr = btrfs_node_blockptr(eb, path->slots[i]);
eb = read_tree_block(root, bytenr, ptr_gen);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ return PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
blocksize = root->nodesize;
generation = btrfs_node_ptr_generation(upper->eb, slot);
eb = read_tree_block(root, bytenr, generation);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ err = PTR_ERR(eb);
+ goto next;
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
err = -EIO;
goto next;
BUG_ON(block->key_ready);
eb = read_tree_block(rc->extent_root, block->bytenr,
block->key.offset);
- if (!eb || !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb)) {
+ return PTR_ERR(eb);
+ } else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
kfree(sparity);
}
+static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
+{
+ struct scrub_parity *sparity = container_of(work, struct scrub_parity,
+ work);
+ struct scrub_ctx *sctx = sparity->sctx;
+
+ scrub_free_parity(sparity);
+ scrub_pending_bio_dec(sctx);
+}
+
static void scrub_parity_bio_endio(struct bio *bio, int error)
{
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
- struct scrub_ctx *sctx = sparity->sctx;
if (error)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
- scrub_free_parity(sparity);
- scrub_pending_bio_dec(sctx);
bio_put(bio);
+
+ btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
+ scrub_parity_bio_endio_worker, NULL, NULL);
+ btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
+ &sparity->work);
}
static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
ret = -ENOMEM;
goto out;
}
+ fs_info->scrub_parity_workers =
+ btrfs_alloc_workqueue("btrfs-scrubparity", flags,
+ max_active, 2);
+ if (!fs_info->scrub_parity_workers) {
+ ret = -ENOMEM;
+ goto out;
+ }
}
++fs_info->scrub_workers_refcnt;
out:
btrfs_destroy_workqueue(fs_info->scrub_workers);
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+ btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
}
* after this directory is moved, we can try to rmdir the ino rmdir_ino.
*/
u64 rmdir_ino;
+ bool orphanized;
};
struct orphan_dir_info {
/* may be truncated in case it's the last extent in a file */
u64 extent_len;
+ /* data offset in the file extent item */
+ u64 data_offset;
+
/* Just to check for bugs in backref resolving */
int found_itself;
};
if (ret < 0)
return ret;
- if (offset + bctx->extent_len > i_size)
+ if (offset + bctx->data_offset + bctx->extent_len > i_size)
return 0;
/*
backref_ctx->cur_offset = data_offset;
backref_ctx->found_itself = 0;
backref_ctx->extent_len = num_bytes;
+ /*
+ * For non-compressed extents iterate_extent_inodes() gives us extent
+ * offsets that already take into account the data offset, but not for
+ * compressed extents, since the offset is logical and not relative to
+ * the physical extent locations. We must take this into account to
+ * avoid sending clone offsets that go beyond the source file's size,
+ * which would result in the clone ioctl failing with -EINVAL on the
+ * receiving end.
+ */
+ if (compressed == BTRFS_COMPRESS_NONE)
+ backref_ctx->data_offset = 0;
+ else
+ backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
/*
* The last extent of a file may be too large due to page alignment.
goto out;
}
- /* we know that it is or will be overwritten. check this now */
- if (ow_inode < sctx->send_progress)
+ /*
+ * We know that it is or will be overwritten. Check this now.
+ * The current inode being processed might have been the one that caused
+ * inode 'ino' to be orphanized, therefore ow_inode can actually be the
+ * same as sctx->send_progress.
+ */
+ if (ow_inode <= sctx->send_progress)
ret = 1;
else
ret = 0;
fs_path_reset(dest);
while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
+ struct waiting_dir_move *wdm;
+
fs_path_reset(name);
if (is_waiting_for_rm(sctx, ino)) {
break;
}
- if (is_waiting_for_move(sctx, ino)) {
+ wdm = get_waiting_dir_move(sctx, ino);
+ if (wdm && wdm->orphanized) {
+ ret = gen_unique_name(sctx, ino, gen, name);
+ stop = 1;
+ } else if (wdm) {
ret = get_first_ref(sctx->parent_root, ino,
&parent_inode, &parent_gen, name);
} else {
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
le64_to_cpu(sctx->send_root->root_item.ctransid));
if (parent_root) {
- TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
- sctx->parent_root->root_item.uuid);
+ if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ parent_root->root_item.received_uuid);
+ else
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ parent_root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
le64_to_cpu(sctx->parent_root->root_item.ctransid));
}
return entry != NULL;
}
-static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
{
struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
struct rb_node *parent = NULL;
return -ENOMEM;
dm->ino = ino;
dm->rmdir_ino = 0;
+ dm->orphanized = orphanized;
while (*p) {
parent = *p;
goto out;
}
- ret = add_waiting_dir_move(sctx, pm->ino);
+ ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
if (ret)
goto out;
return ret;
}
+/*
+ * Check if ino ino1 is an ancestor of inode ino2 in the given root.
+ * Return 1 if true, 0 if false and < 0 on error.
+ */
+static int is_ancestor(struct btrfs_root *root,
+ const u64 ino1,
+ const u64 ino1_gen,
+ const u64 ino2,
+ struct fs_path *fs_path)
+{
+ u64 ino = ino2;
+
+ while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+ int ret;
+ u64 parent;
+ u64 parent_gen;
+
+ fs_path_reset(fs_path);
+ ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
+ if (ret < 0) {
+ if (ret == -ENOENT && ino == ino2)
+ ret = 0;
+ return ret;
+ }
+ if (parent == ino1)
+ return parent_gen == ino1_gen ? 1 : 0;
+ ino = parent;
+ }
+ return 0;
+}
+
static int wait_for_parent_move(struct send_ctx *sctx,
- struct recorded_ref *parent_ref)
+ struct recorded_ref *parent_ref,
+ const bool is_orphan)
{
int ret = 0;
u64 ino = parent_ref->dir;
* Our current directory inode may not yet be renamed/moved because some
* ancestor (immediate or not) has to be renamed/moved first. So find if
* such ancestor exists and make sure our own rename/move happens after
- * that ancestor is processed.
+ * that ancestor is processed to avoid path build infinite loops (done
+ * at get_cur_path()).
*/
while (ino > BTRFS_FIRST_FREE_OBJECTID) {
if (is_waiting_for_move(sctx, ino)) {
- ret = 1;
+ /*
+ * If the current inode is an ancestor of ino in the
+ * parent root, we need to delay the rename of the
+ * current inode, otherwise don't delayed the rename
+ * because we can end up with a circular dependency
+ * of renames, resulting in some directories never
+ * getting the respective rename operations issued in
+ * the send stream or getting into infinite path build
+ * loops.
+ */
+ ret = is_ancestor(sctx->parent_root,
+ sctx->cur_ino, sctx->cur_inode_gen,
+ ino, path_before);
break;
}
ino,
&sctx->new_refs,
&sctx->deleted_refs,
- false);
+ is_orphan);
if (!ret)
ret = 1;
}
}
}
+ if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
+ can_rename) {
+ ret = wait_for_parent_move(sctx, cur, is_orphan);
+ if (ret < 0)
+ goto out;
+ if (ret == 1) {
+ can_rename = false;
+ *pending_move = 1;
+ }
+ }
+
/*
* link/move the ref to the new place. If we have an orphan
* inode, move it and update valid_path. If not, link or move
* dirs, we always have one new and one deleted
* ref. The deleted ref is ignored later.
*/
- ret = wait_for_parent_move(sctx, cur);
- if (ret < 0)
- goto out;
- if (ret) {
- *pending_move = 1;
- } else {
- ret = send_rename(sctx, valid_path,
- cur->full_path);
- if (!ret)
- ret = fs_path_copy(valid_path,
- cur->full_path);
- }
+ ret = send_rename(sctx, valid_path,
+ cur->full_path);
+ if (!ret)
+ ret = fs_path_copy(valid_path,
+ cur->full_path);
if (ret < 0)
goto out;
} else {
if (ret < 0)
goto out;
- TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
- clone_root->root->root_item.uuid);
+ /*
+ * If the parent we're using has a received_uuid set then use that as
+ * our clone source as that is what we will look for when doing a
+ * receive.
+ *
+ * This covers the case that we create a snapshot off of a received
+ * subvolume and then use that as the parent and try to receive on a
+ * different host.
+ */
+ if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ clone_root->root->root_item.received_uuid);
+ else
+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+ clone_root->root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
le64_to_cpu(clone_root->root->root_item.ctransid));
TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
* __btrfs_std_error decodes expected errors from the caller and
* invokes the approciate error response.
*/
+__cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...)
{
* We'll complete the cleanup in btrfs_end_transaction and
* btrfs_commit_transaction.
*/
+__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno)
{
- /*
- * Report first abort since mount
- */
- if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
- &root->fs_info->fs_state)) {
- WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
- errno);
- }
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
* __btrfs_panic decodes unexpected, fatal errors from the caller,
* issues an alert, and either panics or BUGs, depending on mount options.
*/
+__cold
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...)
{
return error;
}
-static struct dentry *get_default_root(struct super_block *sb,
- u64 subvol_objectid)
+static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_root *new_root;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
- struct btrfs_key location;
- struct inode *inode;
- u64 dir_id;
- int new = 0;
+ struct btrfs_root *fs_root;
+ struct btrfs_root_ref *root_ref;
+ struct btrfs_inode_ref *inode_ref;
+ struct btrfs_key key;
+ struct btrfs_path *path = NULL;
+ char *name = NULL, *ptr;
+ u64 dirid;
+ int len;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ path->leave_spinning = 1;
+
+ name = kmalloc(PATH_MAX, GFP_NOFS);
+ if (!name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ ptr = name + PATH_MAX - 1;
+ ptr[0] = '\0';
/*
- * We have a specific subvol we want to mount, just setup location and
- * go look up the root.
+ * Walk up the subvolume trees in the tree of tree roots by root
+ * backrefs until we hit the top-level subvolume.
*/
- if (subvol_objectid) {
- location.objectid = subvol_objectid;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
- goto find_root;
+ while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+ key.objectid = subvol_objectid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = btrfs_previous_item(root, path, subvol_objectid,
+ BTRFS_ROOT_BACKREF_KEY);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto err;
+ }
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ subvol_objectid = key.offset;
+
+ root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_root_ref);
+ len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
+ ptr -= len + 1;
+ if (ptr < name) {
+ ret = -ENAMETOOLONG;
+ goto err;
+ }
+ read_extent_buffer(path->nodes[0], ptr + 1,
+ (unsigned long)(root_ref + 1), len);
+ ptr[0] = '/';
+ dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
+ btrfs_release_path(path);
+
+ key.objectid = subvol_objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(fs_root)) {
+ ret = PTR_ERR(fs_root);
+ goto err;
+ }
+
+ /*
+ * Walk up the filesystem tree by inode refs until we hit the
+ * root directory.
+ */
+ while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
+ key.objectid = dirid;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = btrfs_previous_item(fs_root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+ if (ret < 0) {
+ goto err;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto err;
+ }
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ dirid = key.offset;
+
+ inode_ref = btrfs_item_ptr(path->nodes[0],
+ path->slots[0],
+ struct btrfs_inode_ref);
+ len = btrfs_inode_ref_name_len(path->nodes[0],
+ inode_ref);
+ ptr -= len + 1;
+ if (ptr < name) {
+ ret = -ENAMETOOLONG;
+ goto err;
+ }
+ read_extent_buffer(path->nodes[0], ptr + 1,
+ (unsigned long)(inode_ref + 1), len);
+ ptr[0] = '/';
+ btrfs_release_path(path);
+ }
}
+ btrfs_free_path(path);
+ if (ptr == name + PATH_MAX - 1) {
+ name[0] = '/';
+ name[1] = '\0';
+ } else {
+ memmove(name, ptr, name + PATH_MAX - ptr);
+ }
+ return name;
+
+err:
+ btrfs_free_path(path);
+ kfree(name);
+ return ERR_PTR(ret);
+}
+
+static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_dir_item *di;
+ struct btrfs_path *path;
+ struct btrfs_key location;
+ u64 dir_id;
+
path = btrfs_alloc_path();
if (!path)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
path->leave_spinning = 1;
/*
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
if (IS_ERR(di)) {
btrfs_free_path(path);
- return ERR_CAST(di);
+ return PTR_ERR(di);
}
if (!di) {
/*
* Ok the default dir item isn't there. This is weird since
* it's always been there, but don't freak out, just try and
- * mount to root most subvolume.
+ * mount the top-level subvolume.
*/
btrfs_free_path(path);
- dir_id = BTRFS_FIRST_FREE_OBJECTID;
- new_root = fs_info->fs_root;
- goto setup_root;
+ *objectid = BTRFS_FS_TREE_OBJECTID;
+ return 0;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
btrfs_free_path(path);
-
-find_root:
- new_root = btrfs_read_fs_root_no_name(fs_info, &location);
- if (IS_ERR(new_root))
- return ERR_CAST(new_root);
-
- if (!(sb->s_flags & MS_RDONLY)) {
- int ret;
- down_read(&fs_info->cleanup_work_sem);
- ret = btrfs_orphan_cleanup(new_root);
- up_read(&fs_info->cleanup_work_sem);
- if (ret)
- return ERR_PTR(ret);
- }
-
- dir_id = btrfs_root_dirid(&new_root->root_item);
-setup_root:
- location.objectid = dir_id;
- location.type = BTRFS_INODE_ITEM_KEY;
- location.offset = 0;
-
- inode = btrfs_iget(sb, &location, new_root, &new);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- /*
- * If we're just mounting the root most subvol put the inode and return
- * a reference to the dentry. We will have already gotten a reference
- * to the inode in btrfs_fill_super so we're good to go.
- */
- if (!new && d_inode(sb->s_root) == inode) {
- iput(inode);
- return dget(sb->s_root);
- }
-
- return d_obtain_root(inode);
+ *objectid = location.objectid;
+ return 0;
}
static int btrfs_fill_super(struct super_block *sb,
seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval);
+ seq_printf(seq, ",subvolid=%llu",
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+ seq_puts(seq, ",subvol=");
+ seq_dentry(seq, dentry, " \t\n\\");
return 0;
}
}
/*
- * This will strip out the subvol=%s argument for an argument string and add
- * subvolid=0 to make sure we get the actual tree root for path walking to the
- * subvol we want.
+ * This will add subvolid=0 to the argument string while removing any subvol=
+ * and subvolid= arguments to make sure we get the top-level root for path
+ * walking to the subvol we want.
*/
static char *setup_root_args(char *args)
{
- unsigned len = strlen(args) + 2 + 1;
- char *src, *dst, *buf;
+ char *buf, *dst, *sep;
- /*
- * We need the same args as before, but with this substitution:
- * s!subvol=[^,]+!subvolid=0!
- *
- * Since the replacement string is up to 2 bytes longer than the
- * original, allocate strlen(args) + 2 + 1 bytes.
- */
+ if (!args)
+ return kstrdup("subvolid=0", GFP_NOFS);
- src = strstr(args, "subvol=");
- /* This shouldn't happen, but just in case.. */
- if (!src)
- return NULL;
-
- buf = dst = kmalloc(len, GFP_NOFS);
+ /* The worst case is that we add ",subvolid=0" to the end. */
+ buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS);
if (!buf)
return NULL;
- /*
- * If the subvol= arg is not at the start of the string,
- * copy whatever precedes it into buf.
- */
- if (src != args) {
- *src++ = '\0';
- strcpy(buf, args);
- dst += strlen(args);
+ while (1) {
+ sep = strchrnul(args, ',');
+ if (!strstarts(args, "subvol=") &&
+ !strstarts(args, "subvolid=")) {
+ memcpy(dst, args, sep - args);
+ dst += sep - args;
+ *dst++ = ',';
+ }
+ if (*sep)
+ args = sep + 1;
+ else
+ break;
}
-
strcpy(dst, "subvolid=0");
- dst += strlen("subvolid=0");
-
- /*
- * If there is a "," after the original subvol=... string,
- * copy that suffix into our buffer. Otherwise, we're done.
- */
- src = strchr(src, ',');
- if (src)
- strcpy(dst, src);
return buf;
}
-static struct dentry *mount_subvol(const char *subvol_name, int flags,
- const char *device_name, char *data)
+static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
+ int flags, const char *device_name,
+ char *data)
{
struct dentry *root;
- struct vfsmount *mnt;
+ struct vfsmount *mnt = NULL;
char *newargs;
+ int ret;
newargs = setup_root_args(data);
- if (!newargs)
- return ERR_PTR(-ENOMEM);
- mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
- newargs);
+ if (!newargs) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
+ }
- if (PTR_RET(mnt) == -EBUSY) {
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
+ if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
if (flags & MS_RDONLY) {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
- newargs);
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
+ device_name, newargs);
} else {
- int r;
- mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
- newargs);
+ mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
+ device_name, newargs);
if (IS_ERR(mnt)) {
- kfree(newargs);
- return ERR_CAST(mnt);
+ root = ERR_CAST(mnt);
+ mnt = NULL;
+ goto out;
}
- r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
- if (r < 0) {
- /* FIXME: release vfsmount mnt ??*/
- kfree(newargs);
- return ERR_PTR(r);
+ down_write(&mnt->mnt_sb->s_umount);
+ ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
+ up_write(&mnt->mnt_sb->s_umount);
+ if (ret < 0) {
+ root = ERR_PTR(ret);
+ goto out;
}
}
}
+ if (IS_ERR(mnt)) {
+ root = ERR_CAST(mnt);
+ mnt = NULL;
+ goto out;
+ }
- kfree(newargs);
+ if (!subvol_name) {
+ if (!subvol_objectid) {
+ ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
+ &subvol_objectid);
+ if (ret) {
+ root = ERR_PTR(ret);
+ goto out;
+ }
+ }
+ subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
+ subvol_objectid);
+ if (IS_ERR(subvol_name)) {
+ root = ERR_CAST(subvol_name);
+ subvol_name = NULL;
+ goto out;
+ }
- if (IS_ERR(mnt))
- return ERR_CAST(mnt);
+ }
root = mount_subtree(mnt, subvol_name);
+ /* mount_subtree() drops our reference on the vfsmount. */
+ mnt = NULL;
- if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
+ if (!IS_ERR(root)) {
struct super_block *s = root->d_sb;
- dput(root);
- root = ERR_PTR(-EINVAL);
- deactivate_locked_super(s);
- printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
- subvol_name);
+ struct inode *root_inode = d_inode(root);
+ u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
+
+ ret = 0;
+ if (!is_subvolume_inode(root_inode)) {
+ pr_err("BTRFS: '%s' is not a valid subvolume\n",
+ subvol_name);
+ ret = -EINVAL;
+ }
+ if (subvol_objectid && root_objectid != subvol_objectid) {
+ /*
+ * This will also catch a race condition where a
+ * subvolume which was passed by ID is renamed and
+ * another subvolume is renamed over the old location.
+ */
+ pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n",
+ subvol_name, subvol_objectid);
+ ret = -EINVAL;
+ }
+ if (ret) {
+ dput(root);
+ root = ERR_PTR(ret);
+ deactivate_locked_super(s);
+ }
}
+out:
+ mntput(mnt);
+ kfree(newargs);
+ kfree(subvol_name);
return root;
}
{
struct block_device *bdev = NULL;
struct super_block *s;
- struct dentry *root;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
return ERR_PTR(error);
}
- if (subvol_name) {
- root = mount_subvol(subvol_name, flags, device_name, data);
- kfree(subvol_name);
- return root;
+ if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+ /* mount_subvol() will free subvol_name. */
+ return mount_subvol(subvol_name, subvol_objectid, flags,
+ device_name, data);
}
security_init_mnt_opts(&new_sec_opts);
error = btrfs_fill_super(s, fs_devices, data,
flags & MS_SILENT ? 1 : 0);
}
-
- root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
- if (IS_ERR(root)) {
+ if (error) {
deactivate_locked_super(s);
- error = PTR_ERR(root);
goto error_sec_opts;
}
fs_info = btrfs_sb(s);
error = setup_security_options(fs_info, s, &new_sec_opts);
if (error) {
- dput(root);
deactivate_locked_super(s);
goto error_sec_opts;
}
- return root;
+ return dget(s->s_root);
error_close_devices:
btrfs_close_devices(fs_devices);
#include "volumes.h"
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
+static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
static u64 get_features(struct btrfs_fs_info *fs_info,
enum btrfs_feature_set set)
BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
-static struct attribute *btrfs_attrs[] = {
+static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(label),
BTRFS_ATTR_PTR(nodesize),
BTRFS_ATTR_PTR(sectorsize),
static void btrfs_release_super_kobj(struct kobject *kobj)
{
- struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- complete(&fs_info->kobj_unregister);
+ struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj);
+
+ memset(&fs_devs->super_kobj, 0, sizeof(struct kobject));
+ complete(&fs_devs->kobj_unregister);
}
static struct kobj_type btrfs_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = btrfs_release_super_kobj,
- .default_attrs = btrfs_attrs,
};
+static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj)
+{
+ if (kobj->ktype != &btrfs_ktype)
+ return NULL;
+ return container_of(kobj, struct btrfs_fs_devices, super_kobj);
+}
+
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
{
if (kobj->ktype != &btrfs_ktype)
return NULL;
- return container_of(kobj, struct btrfs_fs_info, super_kobj);
+ return to_fs_devs(kobj)->fs_info;
}
#define NUM_FEATURE_BITS 64
attrs[0] = &fa->kobj_attr.attr;
if (add) {
int ret;
- ret = sysfs_merge_group(&fs_info->super_kobj,
+ ret = sysfs_merge_group(&fs_info->fs_devices->super_kobj,
&agroup);
if (ret)
return ret;
} else
- sysfs_unmerge_group(&fs_info->super_kobj,
+ sysfs_unmerge_group(&fs_info->fs_devices->super_kobj,
&agroup);
}
return 0;
}
-static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
+static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
+{
+ if (fs_devs->device_dir_kobj) {
+ kobject_del(fs_devs->device_dir_kobj);
+ kobject_put(fs_devs->device_dir_kobj);
+ fs_devs->device_dir_kobj = NULL;
+ }
+
+ if (fs_devs->super_kobj.state_initialized) {
+ kobject_del(&fs_devs->super_kobj);
+ kobject_put(&fs_devs->super_kobj);
+ wait_for_completion(&fs_devs->kobj_unregister);
+ }
+}
+
+/* when fs_devs is NULL it will remove all fsid kobject */
+void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
{
- kobject_del(&fs_info->super_kobj);
- kobject_put(&fs_info->super_kobj);
- wait_for_completion(&fs_info->kobj_unregister);
+ struct list_head *fs_uuids = btrfs_get_fs_uuids();
+
+ if (fs_devs) {
+ __btrfs_sysfs_remove_fsid(fs_devs);
+ return;
+ }
+
+ list_for_each_entry(fs_devs, fs_uuids, list) {
+ __btrfs_sysfs_remove_fsid(fs_devs);
+ }
}
void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
{
+ btrfs_reset_fs_info_ptr(fs_info);
+
if (fs_info->space_info_kobj) {
sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
kobject_del(fs_info->space_info_kobj);
kobject_put(fs_info->space_info_kobj);
}
- kobject_del(fs_info->device_dir_kobj);
- kobject_put(fs_info->device_dir_kobj);
addrm_unknown_feature_attrs(fs_info, false);
- sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
- __btrfs_sysfs_remove_one(fs_info);
+ sysfs_remove_group(&fs_info->fs_devices->super_kobj, &btrfs_feature_attr_group);
+ sysfs_remove_files(&fs_info->fs_devices->super_kobj, btrfs_attrs);
+ btrfs_kobj_rm_device(fs_info->fs_devices, NULL);
}
const char * const btrfs_feature_set_names[3] = {
}
}
-int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+/* when one_device is NULL, it removes all device links */
+
+int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device)
{
struct hd_struct *disk;
struct kobject *disk_kobj;
- if (!fs_info->device_dir_kobj)
+ if (!fs_devices->device_dir_kobj)
return -EINVAL;
if (one_device && one_device->bdev) {
disk = one_device->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
- sysfs_remove_link(fs_info->device_dir_kobj,
+ sysfs_remove_link(fs_devices->device_dir_kobj,
+ disk_kobj->name);
+ }
+
+ if (one_device)
+ return 0;
+
+ list_for_each_entry(one_device,
+ &fs_devices->devices, dev_list) {
+ if (!one_device->bdev)
+ continue;
+ disk = one_device->bdev->bd_part;
+ disk_kobj = &part_to_dev(disk)->kobj;
+
+ sysfs_remove_link(fs_devices->device_dir_kobj,
disk_kobj->name);
}
return 0;
}
-int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *one_device)
+int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
{
- int error = 0;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- struct btrfs_device *dev;
-
- if (!fs_info->device_dir_kobj)
- fs_info->device_dir_kobj = kobject_create_and_add("devices",
- &fs_info->super_kobj);
+ if (!fs_devs->device_dir_kobj)
+ fs_devs->device_dir_kobj = kobject_create_and_add("devices",
+ &fs_devs->super_kobj);
- if (!fs_info->device_dir_kobj)
+ if (!fs_devs->device_dir_kobj)
return -ENOMEM;
+ return 0;
+}
+
+int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
+ struct btrfs_device *one_device)
+{
+ int error = 0;
+ struct btrfs_device *dev;
+
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
struct hd_struct *disk;
struct kobject *disk_kobj;
disk = dev->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
- error = sysfs_create_link(fs_info->device_dir_kobj,
+ error = sysfs_create_link(fs_devices->device_dir_kobj,
disk_kobj, disk_kobj->name);
if (error)
break;
/* Debugging tunables and exported data */
u64 btrfs_debugfs_test;
+/*
+ * Can be called by the device discovery thread.
+ * And parent can be specified for seed device
+ */
+int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
+ struct kobject *parent)
+{
+ int error;
+
+ init_completion(&fs_devs->kobj_unregister);
+ fs_devs->super_kobj.kset = btrfs_kset;
+ error = kobject_init_and_add(&fs_devs->super_kobj,
+ &btrfs_ktype, parent, "%pU", fs_devs->fsid);
+ return error;
+}
+
int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
{
int error;
+ struct btrfs_fs_devices *fs_devs = fs_info->fs_devices;
+ struct kobject *super_kobj = &fs_devs->super_kobj;
+
+ btrfs_set_fs_info_ptr(fs_info);
- init_completion(&fs_info->kobj_unregister);
- fs_info->super_kobj.kset = btrfs_kset;
- error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
- "%pU", fs_info->fsid);
+ error = btrfs_kobj_add_device(fs_devs, NULL);
if (error)
return error;
- error = sysfs_create_group(&fs_info->super_kobj,
- &btrfs_feature_attr_group);
+ error = sysfs_create_files(super_kobj, btrfs_attrs);
if (error) {
- __btrfs_sysfs_remove_one(fs_info);
+ btrfs_kobj_rm_device(fs_devs, NULL);
return error;
}
- error = addrm_unknown_feature_attrs(fs_info, true);
+ error = sysfs_create_group(super_kobj,
+ &btrfs_feature_attr_group);
if (error)
goto failure;
- error = btrfs_kobj_add_device(fs_info, NULL);
+ error = addrm_unknown_feature_attrs(fs_info, true);
if (error)
goto failure;
fs_info->space_info_kobj = kobject_create_and_add("allocation",
- &fs_info->super_kobj);
+ super_kobj);
if (!fs_info->space_info_kobj) {
error = -ENOMEM;
goto failure;
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
-int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device);
-int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *one_device);
+int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
+ struct kobject *parent);
+int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
#endif /* _BTRFS_SYSFS_H_ */
#include "../transaction.h"
#include "../disk-io.h"
#include "../qgroup.h"
+#include "../backref.h"
static void init_dummy_trans(struct btrfs_trans_handle *trans)
{
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct ulist *old_roots = NULL;
+ struct ulist *new_roots = NULL;
int ret;
init_dummy_trans(&trans);
return ret;
}
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+ /*
+ * Since the test trans doesn't havee the complicated delayed refs,
+ * we can only call btrfs_qgroup_account_extent() directly to test
+ * quota.
+ */
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
if (ret) {
- test_msg("Couldn't add space to a qgroup %d\n", ret);
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
if (ret)
return ret;
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Delayed qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
+ old_roots = NULL;
+ new_roots = NULL;
+
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
ret = remove_extent_item(root, 4096, 4096);
if (ret)
return -EINVAL;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
- BTRFS_QGROUP_OPER_SUB_EXCL, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Couldn't remove space from the qgroup %d\n", ret);
- return -EINVAL;
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return -EINVAL;
}
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct ulist *old_roots = NULL;
+ struct ulist *new_roots = NULL;
int ret;
init_dummy_trans(&trans);
return ret;
}
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
if (ret)
return ret;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
- BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Couldn't add space to a qgroup %d\n", ret);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Delayed qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
return -EINVAL;
}
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
ret = add_tree_ref(root, 4096, 4096, 0, 256);
if (ret)
return ret;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
- BTRFS_QGROUP_OPER_ADD_SHARED, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Qgroup record ref failed %d\n", ret);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
return -EINVAL;
}
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ if (ret) {
+ ulist_free(old_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
+ return ret;
+ }
+
ret = remove_extent_ref(root, 4096, 4096, 0, 256);
if (ret)
return ret;
- ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
- BTRFS_QGROUP_OPER_SUB_SHARED, 0);
+ ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
if (ret) {
- test_msg("Qgroup record ref failed %d\n", ret);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
+ test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+ old_roots, new_roots);
if (ret) {
- test_msg("Qgroup accounting failed %d\n", ret);
+ test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
cur_trans->dirty_bg_run = 0;
cur_trans->delayed_refs.href_root = RB_ROOT;
+ cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
cur_trans->delayed_refs.num_heads_ready = 0;
cur_trans->delayed_refs.pending_csums = 0;
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
+ cur_trans->delayed_refs.qgroup_to_skip = 0;
/*
* although the tree mod log is per file system and not per transaction,
h->transaction = cur_trans;
h->blocks_used = 0;
h->bytes_reserved = 0;
+ h->chunk_bytes_reserved = 0;
h->root = root;
h->delayed_ref_updates = 0;
h->use_count = 1;
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);
+ btrfs_trans_release_chunk_metadata(trans);
+
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root) &&
ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
if (pending->error)
goto no_free_objectid;
+ /*
+ * Make qgroup to skip current new snapshot's qgroupid, as it is
+ * accounted by later btrfs_qgroup_inherit().
+ */
+ btrfs_set_skip_qgroup(trans, objectid);
+
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
if (to_reserve > 0) {
to_reserve,
BTRFS_RESERVE_NO_FLUSH);
if (pending->error)
- goto no_free_objectid;
+ goto clear_skip_qgroup;
}
key.objectid = objectid;
btrfs_abort_transaction(trans, root, ret);
goto fail;
}
-
- /*
- * We need to flush delayed refs in order to make sure all of our quota
- * operations have been done before we call btrfs_qgroup_inherit.
- */
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
- ret = btrfs_qgroup_inherit(trans, fs_info,
- root->root_key.objectid,
- objectid, pending->inherit);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
/* see comments in should_cow_block() */
set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_wmb();
goto fail;
}
}
+
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
+ /*
+ * account qgroup counters before qgroup_inherit()
+ */
+ ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+ if (ret)
+ goto fail;
+ ret = btrfs_qgroup_account_extents(trans, fs_info);
+ if (ret)
+ goto fail;
+ ret = btrfs_qgroup_inherit(trans, fs_info,
+ root->root_key.objectid,
+ objectid, pending->inherit);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
fail:
pending->error = ret;
dir_item_existed:
trans->block_rsv = rsv;
trans->bytes_reserved = 0;
+clear_skip_qgroup:
+ btrfs_clear_skip_qgroup(trans);
no_free_objectid:
kfree(new_root_item);
root_item_alloc_fail:
goto scrub_continue;
}
+ /* Reocrd old roots for later qgroup accounting */
+ ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info);
+ if (ret) {
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto scrub_continue;
+ }
+
/*
* make sure none of the code above managed to slip in a
* delayed item
*/
btrfs_free_log_root_tree(trans, root->fs_info);
+ /*
+ * Since fs roots are all committed, we can get a quite accurate
+ * new_roots. So let's do quota accounting.
+ */
+ ret = btrfs_qgroup_account_extents(trans, root->fs_info);
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->tree_log_mutex);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto scrub_continue;
+ }
+
ret = commit_cowonly_roots(trans, root);
if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex);
clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
+ btrfs_trans_release_chunk_metadata(trans);
+
spin_lock(&root->fs_info->trans_lock);
cur_trans->state = TRANS_STATE_UNBLOCKED;
root->fs_info->running_transaction = NULL;
btrfs_scrub_continue(root);
cleanup_transaction:
btrfs_trans_release_metadata(trans, root);
+ btrfs_trans_release_chunk_metadata(trans);
trans->block_rsv = NULL;
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
+ u64 chunk_bytes_reserved;
u64 qgroup_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
spin_unlock(&BTRFS_I(inode)->lock);
}
+/*
+ * Make qgroup codes to skip given qgroupid, means the old/new_roots for
+ * qgroup won't contain the qgroupid in it.
+ */
+static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans,
+ u64 qgroupid)
+{
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ WARN_ON(delayed_refs->qgroup_to_skip);
+ delayed_refs->qgroup_to_skip = qgroupid;
+}
+
+static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ WARN_ON(!delayed_refs->qgroup_to_skip);
+ delayed_refs->qgroup_to_skip = 0;
+}
+
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
goto out;
- if (btrfs_test_opt(root, SSD))
- goto out;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
&ordered->flags))
continue;
- if (ordered->csum_bytes_left) {
- btrfs_start_ordered_extent(inode, ordered, 0);
- wait_event(ordered->wait,
- ordered->csum_bytes_left == 0);
- }
-
list_for_each_entry(sum, &ordered->list, list) {
ret = btrfs_csum_file_blocks(trans, log, sum);
if (ret)
return NULL;
}
+static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node)
+{
+ rb_erase(&node->rb_node, &ulist->root);
+ list_del(&node->list);
+ kfree(node);
+ BUG_ON(ulist->nnodes == 0);
+ ulist->nnodes--;
+}
+
static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
{
struct rb_node **p = &ulist->root.rb_node;
node->val = val;
node->aux = aux;
-#ifdef CONFIG_BTRFS_DEBUG
- node->seqnum = ulist->nnodes;
-#endif
ret = ulist_rbtree_insert(ulist, node);
ASSERT(!ret);
return 1;
}
+/*
+ * ulist_del - delete one node from ulist
+ * @ulist: ulist to remove node from
+ * @val: value to delete
+ * @aux: aux to delete
+ *
+ * The deletion will only be done when *BOTH* val and aux matches.
+ * Return 0 for successful delete.
+ * Return > 0 for not found.
+ */
+int ulist_del(struct ulist *ulist, u64 val, u64 aux)
+{
+ struct ulist_node *node;
+
+ node = ulist_rbtree_search(ulist, val);
+ /* Not found */
+ if (!node)
+ return 1;
+
+ if (node->aux != aux)
+ return 1;
+
+ /* Found and delete */
+ ulist_rbtree_erase(ulist, node);
+ return 0;
+}
+
/**
* ulist_next - iterate ulist
* @ulist: ulist to iterate
uiter->cur_list = uiter->cur_list->next;
} else {
uiter->cur_list = ulist->nodes.next;
-#ifdef CONFIG_BTRFS_DEBUG
- uiter->i = 0;
-#endif
}
node = list_entry(uiter->cur_list, struct ulist_node, list);
-#ifdef CONFIG_BTRFS_DEBUG
- ASSERT(node->seqnum == uiter->i);
- ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
- uiter->i++;
-#endif
return node;
}
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask);
+int ulist_del(struct ulist *ulist, u64 val, u64 aux);
/* just like ulist_add_merge() but take a pointer for the aux data */
static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
+struct list_head *btrfs_get_fs_uuids(void)
+{
+ return &fs_uuids;
+}
static struct btrfs_fs_devices *__alloc_fs_devices(void)
{
run_scheduled_bios(device);
}
+
+void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+{
+ struct btrfs_fs_devices *fs_devs;
+ struct btrfs_device *dev;
+
+ if (!cur_dev->name)
+ return;
+
+ list_for_each_entry(fs_devs, &fs_uuids, list) {
+ int del = 1;
+
+ if (fs_devs->opened)
+ continue;
+ if (fs_devs->seeding)
+ continue;
+
+ list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+
+ if (dev == cur_dev)
+ continue;
+ if (!dev->name)
+ continue;
+
+ /*
+ * Todo: This won't be enough. What if the same device
+ * comes back (with new uuid and) with its mapper path?
+ * But for now, this does help as mostly an admin will
+ * either use mapper or non mapper path throughout.
+ */
+ rcu_read_lock();
+ del = strcmp(rcu_str_deref(dev->name),
+ rcu_str_deref(cur_dev->name));
+ rcu_read_unlock();
+ if (!del)
+ break;
+ }
+
+ if (!del) {
+ /* delete the stale device */
+ if (fs_devs->num_devices == 1) {
+ btrfs_sysfs_remove_fsid(fs_devs);
+ list_del(&fs_devs->list);
+ free_fs_devices(fs_devs);
+ } else {
+ fs_devs->num_devices--;
+ list_del(&dev->dev_list);
+ rcu_string_free(dev->name);
+ kfree(dev);
+ }
+ break;
+ }
+ }
+}
+
/*
* Add new device to list of registered devices
*
if (!fs_devices->opened)
device->generation = found_transid;
+ /*
+ * if there is new btrfs on an already registered device,
+ * then remove the stale device entry.
+ */
+ btrfs_free_stale_device(device);
+
*fs_devices_ret = fs_devices;
return ret;
static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
{
- struct btrfs_device *device;
+ struct btrfs_device *device, *tmp;
if (--fs_devices->opened > 0)
return 0;
mutex_lock(&fs_devices->device_list_mutex);
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
struct btrfs_device *new_device;
struct rcu_string *name;
map = (struct map_lookup *)em->bdev;
for (i = 0; i < map->num_stripes; i++) {
+ u64 end;
+
if (map->stripes[i].dev != device)
continue;
if (map->stripes[i].physical >= physical_start + len ||
map->stripes[i].physical + em->orig_block_len <=
physical_start)
continue;
- *start = map->stripes[i].physical +
- em->orig_block_len;
- ret = 1;
+ /*
+ * Make sure that while processing the pinned list we do
+ * not override our *start with a lower value, because
+ * we can have pinned chunks that fall within this
+ * device hole and that have lower physical addresses
+ * than the pending chunks we processed before. If we
+ * do not take this special care we can end up getting
+ * 2 pending chunks that start at the same physical
+ * device offsets because the end offset of a pinned
+ * chunk can be equal to the start offset of some
+ * pending chunk.
+ */
+ end = map->stripes[i].physical + em->orig_block_len;
+ if (end > *start) {
+ *start = end;
+ ret = 1;
+ }
}
}
if (search_list == &trans->transaction->pending_chunks) {
if (device->bdev) {
device->fs_devices->open_devices--;
/* remove sysfs entry */
- btrfs_kobj_rm_device(root->fs_info, device);
+ btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
}
call_rcu(&device->rcu, free_device);
mutex_lock(&uuid_mutex);
WARN_ON(!tgtdev);
mutex_lock(&fs_info->fs_devices->device_list_mutex);
+
+ btrfs_kobj_rm_device(fs_info->fs_devices, tgtdev);
+
if (tgtdev->bdev) {
btrfs_scratch_superblock(tgtdev);
fs_info->fs_devices->open_devices--;
tmp + 1);
/* add sysfs device entry */
- btrfs_kobj_add_device(root->fs_info, device);
+ btrfs_kobj_add_device(root->fs_info->fs_devices, device);
/*
* we've got more storage, clear any full flags on the space
*/
snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
root->fs_info->fsid);
- if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
- goto error_trans;
+ if (kobject_rename(&root->fs_info->fs_devices->super_kobj,
+ fsid_buf))
+ pr_warn("BTRFS: sysfs: failed to create fsid for sprout\n");
}
root->fs_info->num_tolerated_disk_barrier_failures =
error_trans:
btrfs_end_transaction(trans, root);
rcu_string_free(device->name);
- btrfs_kobj_rm_device(root->fs_info, device);
+ btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
return -EINVAL;
}
map = (struct map_lookup *)em->bdev;
+ lock_chunks(root->fs_info->chunk_root);
+ check_system_chunk(trans, extent_root, map->type);
+ unlock_chunks(root->fs_info->chunk_root);
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
uuid_root = btrfs_create_tree(trans, fs_info,
BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
- btrfs_abort_transaction(trans, tree_root,
- PTR_ERR(uuid_root));
- return PTR_ERR(uuid_root);
+ ret = PTR_ERR(uuid_root);
+ btrfs_abort_transaction(trans, tree_root, ret);
+ return ret;
}
fs_info->uuid_root = uuid_root;
int slot;
int failed = 0;
bool retried = false;
+ bool checked_pending_chunks = false;
struct extent_buffer *l;
struct btrfs_key key;
struct btrfs_super_block *super_copy = root->fs_info->super_copy;
goto again;
} else if (failed && retried) {
ret = -ENOSPC;
- lock_chunks(root);
-
- btrfs_device_set_total_bytes(device, old_size);
- if (device->writeable)
- device->fs_devices->total_rw_bytes += diff;
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space += diff;
- spin_unlock(&root->fs_info->free_chunk_lock);
- unlock_chunks(root);
goto done;
}
}
lock_chunks(root);
+
+ /*
+ * We checked in the above loop all device extents that were already in
+ * the device tree. However before we have updated the device's
+ * total_bytes to the new size, we might have had chunk allocations that
+ * have not complete yet (new block groups attached to transaction
+ * handles), and therefore their device extents were not yet in the
+ * device tree and we missed them in the loop above. So if we have any
+ * pending chunk using a device extent that overlaps the device range
+ * that we can not use anymore, commit the current transaction and
+ * repeat the search on the device tree - this way we guarantee we will
+ * not have chunks using device extents that end beyond 'new_size'.
+ */
+ if (!checked_pending_chunks) {
+ u64 start = new_size;
+ u64 len = old_size - new_size;
+
+ if (contains_pending_extent(trans, device, &start, len)) {
+ unlock_chunks(root);
+ checked_pending_chunks = true;
+ failed = 0;
+ retried = false;
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ goto done;
+ goto again;
+ }
+ }
+
btrfs_device_set_disk_total_bytes(device, new_size);
if (list_empty(&device->resized_list))
list_add_tail(&device->resized_list,
btrfs_end_transaction(trans, root);
done:
btrfs_free_path(path);
+ if (ret) {
+ lock_chunks(root);
+ btrfs_device_set_total_bytes(device, old_size);
+ if (device->writeable)
+ device->fs_devices->total_rw_bytes += diff;
+ spin_lock(&root->fs_info->free_chunk_lock);
+ root->fs_info->free_chunk_space += diff;
+ spin_unlock(&root->fs_info->free_chunk_lock);
+ unlock_chunks(root);
+ }
return ret;
}
free_extent_map(em);
return -EIO;
}
+ btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing",
+ devid, uuid);
}
map->stripes[i].dev->in_fs_metadata = 1;
}
if (!btrfs_test_opt(root, DEGRADED))
return -EIO;
- btrfs_warn(root->fs_info, "devid %llu missing", devid);
device = add_missing_dev(root, fs_devices, devid, dev_uuid);
if (!device)
return -ENOMEM;
+ btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
+ devid, dev_uuid);
} else {
if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
return -EIO;
}
unlock_chunks(root);
}
+
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ while (fs_devices) {
+ fs_devices->fs_info = fs_info;
+ fs_devices = fs_devices->seed;
+ }
+}
+
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ while (fs_devices) {
+ fs_devices->fs_info = NULL;
+ fs_devices = fs_devices->seed;
+ }
+}
* nonrot flag set
*/
int rotating;
+
+ struct btrfs_fs_info *fs_info;
+ /* sysfs kobjects */
+ struct kobject super_kobj;
+ struct kobject *device_dir_kobj;
+ struct completion kobj_unregister;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
mutex_unlock(&root->fs_info->chunk_mutex);
}
+struct list_head *btrfs_get_fs_uuids(void);
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
#endif
options are also slightly simpler (compared to CIFS) due
to protocol improvements.
+config CIFS_SMB311
+ bool "SMB3.1.1 network file system support (Experimental)"
+ depends on CIFS_SMB2 && INET
+
+ help
+ This enables experimental support for the newest, SMB3.1.1, dialect.
+ This dialect includes improved security negotiation features.
+ If unsure, say N
+
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
Smb_21,
Smb_30,
Smb_302,
+#ifdef CONFIG_CIFS_SMB311
+ Smb_311,
+#endif /* SMB311 */
+ Smb_version_err
};
struct mid_q_entry;
void (*new_lease_key)(struct cifs_fid *);
int (*generate_signingkey)(struct cifs_ses *);
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
+ int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
+ struct cifsFileInfo *src_file);
int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const unsigned char *,
char *, unsigned int *);
int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file, u64 src_off, u64 len,
u64 dest_off);
+ int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
+ struct cifsFileInfo *target_file, u64 src_off, u64 len,
+ u64 dest_off);
int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
const unsigned char *, const unsigned char *, char *,
#define SMB302_VERSION_STRING "3.02"
/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
extern struct smb_version_values smb302_values;
+#define SMB311_VERSION_STRING "3.1.1"
+#define ALT_SMB311_VERSION_STRING "3.11"
+extern struct smb_version_operations smb311_operations;
+extern struct smb_version_values smb311_values;
#endif /* _CIFS_GLOB_H */
/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORTS_SPARSE_VDL 0x10000000 /* faster nonsparse extend */
+#define FILE_SUPPORTS_BLOCK_REFCOUNTING 0x08000000 /* allow ioctl dup extents */
#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
char FileName[1];
} __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */
+typedef struct {
+ __le64 AllocationSize;
+ __le64 EndOfFile; /* size ie offset to first free byte in file */
+ __le32 NumberOfLinks; /* hard links */
+ __u8 DeletePending;
+ __u8 Directory;
+ __u16 Pad;
+} __attribute__((packed)) FILE_STANDARD_INFO; /* level 0x102 QPathInfo */
+
+
/* defines for enumerating possible values of the Unix type field below */
#define UNIX_FILE 0
#define UNIX_DIR 1
server->negflavor = CIFS_NEGFLAVOR_UNENCAP;
memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
- } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
- server->capabilities & CAP_EXTENDED_SECURITY) &&
- (pSMBr->EncryptionKeyLength == 0)) {
+ } else if (pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
+ server->capabilities & CAP_EXTENDED_SECURITY) {
server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
rc = decode_ext_sec_blob(ses, pSMBr);
} else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
{ Smb_21, SMB21_VERSION_STRING },
{ Smb_30, SMB30_VERSION_STRING },
{ Smb_302, SMB302_VERSION_STRING },
+#ifdef CONFIG_CIFS_SMB311
+ { Smb_311, SMB311_VERSION_STRING },
+ { Smb_311, ALT_SMB311_VERSION_STRING },
+#endif /* SMB311 */
+ { Smb_version_err, NULL }
};
static int ip_connect(struct TCP_Server_Info *server);
vol->ops = &smb30_operations; /* currently identical with 3.0 */
vol->vals = &smb302_values;
break;
+#ifdef CONFIG_CIFS_SMB311
+ case Smb_311:
+ vol->ops = &smb311_operations;
+ vol->vals = &smb311_values;
+ break;
+#endif /* SMB311 */
#endif
default:
cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
else if (ses)
cifs_put_smb_ses(ses);
+ cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+
free_xid(xid);
}
#endif
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#include <linux/btrfs.h>
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
+#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
- unsigned long srcfd, u64 off, u64 len, u64 destoff)
+ unsigned long srcfd, u64 off, u64 len, u64 destoff,
+ bool dup_extents)
{
int rc;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
truncate_inode_pages_range(&target_inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len)-1);
- if (target_tcon->ses->server->ops->clone_range)
+ if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
+ rc = target_tcon->ses->server->ops->duplicate_extents(xid,
+ smb_file_src, smb_file_target, off, len, destoff);
+ else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
rc = target_tcon->ses->server->ops->clone_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
+ else
+ rc = -EOPNOTSUPP;
/* force revalidate of size and timestamps of target file now
that target is updated on the server */
}
break;
case CIFS_IOC_COPYCHUNK_FILE:
- rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
+ rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
+ break;
+ case BTRFS_IOC_CLONE:
+ rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
+ break;
+ case CIFS_IOC_SET_INTEGRITY:
+ if (pSMBFile == NULL)
+ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ if (tcon->ses->server->ops->set_integrity)
+ rc = tcon->ses->server->ops->set_integrity(xid,
+ tcon, pSMBFile);
+ else
+ rc = -EOPNOTSUPP;
break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
cfile->fid.volatile_fid, cfile->pid, &eof, false);
}
+#ifdef CONFIG_CIFS_SMB311
+static int
+smb2_duplicate_extents(const unsigned int xid,
+ struct cifsFileInfo *srcfile,
+ struct cifsFileInfo *trgtfile, u64 src_off,
+ u64 len, u64 dest_off)
+{
+ int rc;
+ unsigned int ret_data_len;
+ char *retbuf = NULL;
+ struct duplicate_extents_to_file dup_ext_buf;
+ struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
+
+ /* server fileays advertise duplicate extent support with this flag */
+ if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) &
+ FILE_SUPPORTS_BLOCK_REFCOUNTING) == 0)
+ return -EOPNOTSUPP;
+
+ dup_ext_buf.VolatileFileHandle = srcfile->fid.volatile_fid;
+ dup_ext_buf.PersistentFileHandle = srcfile->fid.persistent_fid;
+ dup_ext_buf.SourceFileOffset = cpu_to_le64(src_off);
+ dup_ext_buf.TargetFileOffset = cpu_to_le64(dest_off);
+ dup_ext_buf.ByteCount = cpu_to_le64(len);
+ cifs_dbg(FYI, "duplicate extents: src off %lld dst off %lld len %lld",
+ src_off, dest_off, len);
+
+ rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
+ if (rc)
+ goto duplicate_extents_out;
+
+ rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+ trgtfile->fid.volatile_fid,
+ FSCTL_DUPLICATE_EXTENTS_TO_FILE,
+ true /* is_fsctl */, (char *)&dup_ext_buf,
+ sizeof(struct duplicate_extents_to_file),
+ (char **)&retbuf,
+ &ret_data_len);
+
+ if (ret_data_len > 0)
+ cifs_dbg(FYI, "non-zero response length in duplicate extents");
+
+duplicate_extents_out:
+ return rc;
+}
+#endif /* CONFIG_CIFS_SMB311 */
+
+
static int
smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
cfile->fid.volatile_fid);
}
+static int
+smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile)
+{
+ struct fsctl_set_integrity_information_req integr_info;
+ char *retbuf = NULL;
+ unsigned int ret_data_len;
+
+ integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
+ integr_info.Flags = 0;
+ integr_info.Reserved = 0;
+
+ return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid,
+ FSCTL_SET_INTEGRITY_INFORMATION,
+ true /* is_fsctl */, (char *)&integr_info,
+ sizeof(struct fsctl_set_integrity_information_req),
+ (char **)&retbuf,
+ &ret_data_len);
+
+}
+
static int
smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
.new_lease_key = smb2_new_lease_key,
.generate_signingkey = generate_smb3signingkey,
.calc_signature = smb3_calc_signature,
+ .set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
.set_oplock_level = smb3_set_oplock_level,
.create_lease_buf = smb3_create_lease_buf,
.fallocate = smb3_fallocate,
};
+#ifdef CONFIG_CIFS_SMB311
+struct smb_version_operations smb311_operations = {
+ .compare_fids = smb2_compare_fids,
+ .setup_request = smb2_setup_request,
+ .setup_async_request = smb2_setup_async_request,
+ .check_receive = smb2_check_receive,
+ .add_credits = smb2_add_credits,
+ .set_credits = smb2_set_credits,
+ .get_credits_field = smb2_get_credits_field,
+ .get_credits = smb2_get_credits,
+ .wait_mtu_credits = smb2_wait_mtu_credits,
+ .get_next_mid = smb2_get_next_mid,
+ .read_data_offset = smb2_read_data_offset,
+ .read_data_length = smb2_read_data_length,
+ .map_error = map_smb2_to_linux_error,
+ .find_mid = smb2_find_mid,
+ .check_message = smb2_check_message,
+ .dump_detail = smb2_dump_detail,
+ .clear_stats = smb2_clear_stats,
+ .print_stats = smb2_print_stats,
+ .dump_share_caps = smb2_dump_share_caps,
+ .is_oplock_break = smb2_is_valid_oplock_break,
+ .downgrade_oplock = smb2_downgrade_oplock,
+ .need_neg = smb2_need_neg,
+ .negotiate = smb2_negotiate,
+ .negotiate_wsize = smb2_negotiate_wsize,
+ .negotiate_rsize = smb2_negotiate_rsize,
+ .sess_setup = SMB2_sess_setup,
+ .logoff = SMB2_logoff,
+ .tree_connect = SMB2_tcon,
+ .tree_disconnect = SMB2_tdis,
+ .qfs_tcon = smb3_qfs_tcon,
+ .is_path_accessible = smb2_is_path_accessible,
+ .can_echo = smb2_can_echo,
+ .echo = SMB2_echo,
+ .query_path_info = smb2_query_path_info,
+ .get_srv_inum = smb2_get_srv_inum,
+ .query_file_info = smb2_query_file_info,
+ .set_path_size = smb2_set_path_size,
+ .set_file_size = smb2_set_file_size,
+ .set_file_info = smb2_set_file_info,
+ .set_compression = smb2_set_compression,
+ .mkdir = smb2_mkdir,
+ .mkdir_setinfo = smb2_mkdir_setinfo,
+ .rmdir = smb2_rmdir,
+ .unlink = smb2_unlink,
+ .rename = smb2_rename_path,
+ .create_hardlink = smb2_create_hardlink,
+ .query_symlink = smb2_query_symlink,
+ .query_mf_symlink = smb3_query_mf_symlink,
+ .create_mf_symlink = smb3_create_mf_symlink,
+ .open = smb2_open_file,
+ .set_fid = smb2_set_fid,
+ .close = smb2_close_file,
+ .flush = smb2_flush_file,
+ .async_readv = smb2_async_readv,
+ .async_writev = smb2_async_writev,
+ .sync_read = smb2_sync_read,
+ .sync_write = smb2_sync_write,
+ .query_dir_first = smb2_query_dir_first,
+ .query_dir_next = smb2_query_dir_next,
+ .close_dir = smb2_close_dir,
+ .calc_smb_size = smb2_calc_size,
+ .is_status_pending = smb2_is_status_pending,
+ .oplock_response = smb2_oplock_response,
+ .queryfs = smb2_queryfs,
+ .mand_lock = smb2_mand_lock,
+ .mand_unlock_range = smb2_unlock_range,
+ .push_mand_locks = smb2_push_mandatory_locks,
+ .get_lease_key = smb2_get_lease_key,
+ .set_lease_key = smb2_set_lease_key,
+ .new_lease_key = smb2_new_lease_key,
+ .generate_signingkey = generate_smb3signingkey,
+ .calc_signature = smb3_calc_signature,
+ .set_integrity = smb3_set_integrity,
+ .is_read_op = smb21_is_read_op,
+ .set_oplock_level = smb3_set_oplock_level,
+ .create_lease_buf = smb3_create_lease_buf,
+ .parse_lease_buf = smb3_parse_lease_buf,
+ .clone_range = smb2_clone_range,
+ .duplicate_extents = smb2_duplicate_extents,
+/* .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */
+ .wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
+ .fallocate = smb3_fallocate,
+};
+#endif /* CIFS_SMB311 */
+
struct smb_version_values smb20_values = {
.version_string = SMB20_VERSION_STRING,
.protocol_id = SMB20_PROT_ID,
.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
.create_lease_size = sizeof(struct create_lease_v2),
};
+
+#ifdef CONFIG_CIFS_SMB311
+struct smb_version_values smb311_values = {
+ .version_string = SMB311_VERSION_STRING,
+ .protocol_id = SMB311_PROT_ID,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .create_lease_size = sizeof(struct create_lease_v2),
+};
+#endif /* SMB311 */
return rc;
}
+#ifdef CONFIG_CIFS_SMB311
+/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */
+#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) - 4 */
+
+
+#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
+#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
+
+static void
+build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(38);
+ pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
+ get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+ pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512;
+}
+
+static void
+build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(6);
+ pneg_ctxt->CipherCount = cpu_to_le16(2);
+ pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;
+ pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM;
+}
+
+static void
+assemble_neg_contexts(struct smb2_negotiate_req *req)
+{
+
+ /* +4 is to account for the RFC1001 len field */
+ char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4;
+
+ build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
+ /* Add 2 to size to round to 8 byte boundary */
+ pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
+ build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
+ req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+ req->NegotiateContextCount = cpu_to_le16(2);
+ inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+ + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
+}
+#else
+static void assemble_neg_contexts(struct smb2_negotiate_req *req)
+{
+ return;
+}
+#endif /* SMB311 */
+
+
/*
*
* SMB2 Worker functions follow:
/* ClientGUID must be zero for SMB2.02 dialect */
if (ses->server->vals->protocol_id == SMB20_PROT_ID)
memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE);
- else
+ else {
memcpy(req->ClientGUID, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
-
+ if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ assemble_neg_contexts(req);
+ }
iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID))
cifs_dbg(FYI, "negotiated smb3.02 dialect\n");
+#ifdef CONFIG_CIFS_SMB311
+ else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n");
+#endif /* SMB311 */
else {
- cifs_dbg(VFS, "Illegal dialect returned by server %d\n",
+ cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n",
le16_to_cpu(rsp->DialectRevision));
rc = -EIO;
goto neg_exit;
return rc;
req->hdr.SessionId = 0; /* First session, not a reauthenticate */
- req->VcNumber = 0; /* MBZ */
+ req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */
req->hdr.CreditRequest = cpu_to_le16(3);
__u64 SessionId;
} __packed;
-/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
-
/*
* SMB2 flag definitions
*/
__le16 Reserved; /* MBZ */
__le32 Capabilities;
__u8 ClientGUID[SMB2_CLIENT_GUID_SIZE];
- __le64 ClientStartTime; /* MBZ */
+ /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
+ __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
+ __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */
+ __le16 Reserved2;
__le16 Dialects[1]; /* One dialect (vers=) at a time for now */
} __packed;
#define SMB21_PROT_ID 0x0210
#define SMB30_PROT_ID 0x0300
#define SMB302_PROT_ID 0x0302
+#define SMB311_PROT_ID 0x0311
#define BAD_PROT_ID 0xFFFF
/* SecurityMode flags */
#define SMB2_NT_FIND 0x00100000
#define SMB2_LARGE_FILES 0x00200000
+#define SMB311_SALT_SIZE 32
+/* Hash Algorithm Types */
+#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001)
+
+struct smb2_preauth_neg_context {
+ __le16 ContextType; /* 1 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 HashAlgorithmCount; /* 1 */
+ __le16 SaltLength;
+ __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */
+ __u8 Salt[SMB311_SALT_SIZE];
+} __packed;
+
+/* Encryption Algorithms Ciphers */
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002)
+
+struct smb2_encryption_neg_context {
+ __le16 ContextType; /* 2 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 CipherCount; /* AES-128-GCM and AES-128-CCM */
+ __le16 Ciphers[2]; /* Ciphers[0] since only one used now */
+} __packed;
+
struct smb2_negotiate_rsp {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 65 */
__le16 SecurityMode;
__le16 DialectRevision;
- __le16 Reserved; /* MBZ */
+ __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */
__u8 ServerGUID[16];
__le32 Capabilities;
__le32 MaxTransactSize;
__le64 ServerStartTime;
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
- __le32 Reserved2; /* may be any value, ignore */
+ __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */
__u8 Buffer[1]; /* variable length GSS security buffer */
} __packed;
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING 0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
+
struct smb2_sess_setup_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 25 */
- __u8 VcNumber;
+ __u8 Flags;
__u8 SecurityMode;
__le32 Capabilities;
__le32 Channel;
__le16 Reserved;
} __packed;
+/* Flags/Reserved for SMB3.1.1 */
+#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001
+
struct smb2_tree_connect_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 9 */
- __le16 Reserved;
+ __le16 Reserved; /* Flags in SMB3.1.1 */
__le16 PathOffset;
__le16 PathLength;
__u8 Buffer[1]; /* variable length */
__le32 TotalBytesWritten;
} __packed;
+struct fsctl_set_integrity_information_req {
+ __le16 ChecksumAlgorithm;
+ __le16 Reserved;
+ __le32 Flags;
+} __packed;
+
+struct fsctl_get_integrity_information_rsp {
+ __le16 ChecksumAlgorithm;
+ __le16 Reserved;
+ __le32 Flags;
+ __le32 ChecksumChunkSizeInBytes;
+ __le32 ClusterSizeInBytes;
+} __packed;
+
+/* Integrity ChecksumAlgorithm choices for above */
+#define CHECKSUM_TYPE_NONE 0x0000
+#define CHECKSUM_TYPE_CRC64 0x0002
+#define CHECKSUM_TYPE_UNCHANGED 0xFFFF /* set only */
+
+/* Integrity flags for above */
+#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001
+
+
struct validate_negotiate_info_req {
__le32 Capabilities;
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 CompressionState; /* See cifspdu.h for possible flag values */
} __packed;
+struct duplicate_extents_to_file {
+ __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+ __u64 VolatileFileHandle;
+ __le64 SourceFileOffset;
+ __le64 TargetFileOffset;
+ __le64 ByteCount; /* Bytes to be copied */
+} __packed;
+
struct smb2_ioctl_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 57 */
#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
#define FSCTL_SIS_LINK_FILES 0x0009C104
+#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280
#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
/* strange that the number for this op is not sequential with previous op */
out:
i_mmap_unlock_read(mapping);
- if (bh->b_end_io)
- bh->b_end_io(bh, 1);
-
return error;
}
-static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
+/**
+ * __dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @get_block: The filesystem method used to translate file offsets to blocks
+ *
+ * When a page fault occurs, filesystems may call this helper in their
+ * fault handler for DAX files. __dax_fault() assumes the caller has done all
+ * the necessary locking for the page fault to proceed successfully.
+ */
+int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ get_block_t get_block, dax_iodone_t complete_unwritten)
{
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
page_cache_release(page);
}
+ /*
+ * If we successfully insert the new mapping over an unwritten extent,
+ * we need to ensure we convert the unwritten extent. If there is an
+ * error inserting the mapping, the filesystem needs to leave it as
+ * unwritten to prevent exposure of the stale underlying data to
+ * userspace, but we still need to call the completion function so
+ * the private resources on the mapping buffer can be released. We
+ * indicate what the callback should do via the uptodate variable, same
+ * as for normal BH based IO completions.
+ */
error = dax_insert_mapping(inode, &bh, vma, vmf);
+ if (buffer_unwritten(&bh))
+ complete_unwritten(&bh, !error);
out:
if (error == -ENOMEM)
}
goto out;
}
+EXPORT_SYMBOL(__dax_fault);
/**
* dax_fault - handle a page fault on a DAX file
* fault handler for DAX files.
*/
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
+ get_block_t get_block, dax_iodone_t complete_unwritten)
{
int result;
struct super_block *sb = file_inode(vma->vm_file)->i_sb;
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
}
- result = do_dax_fault(vma, vmf, get_block);
+ result = __dax_fault(vma, vmf, get_block, complete_unwritten);
if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(sb);
#ifdef CONFIG_FS_DAX
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_fault(vma, vmf, ext2_get_block);
+ return dax_fault(vma, vmf, ext2_get_block, NULL);
}
static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext2_get_block);
+ return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
}
static const struct vm_operations_struct ext2_dax_vm_ops = {
}
#ifdef CONFIG_FS_DAX
+static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
+{
+ struct inode *inode = bh->b_assoc_map->host;
+ /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+ loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+ int err;
+ if (!uptodate)
+ return;
+ WARN_ON(!buffer_unwritten(bh));
+ err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+}
+
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_fault(vma, vmf, ext4_get_block);
+ return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
/* Is this the right get_block? */
}
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block);
+ return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
return retval;
}
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
- struct inode *inode = bh->b_assoc_map->host;
- /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
- loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
- int err;
- if (!uptodate)
- return;
- WARN_ON(!buffer_unwritten(bh));
- err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
- if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
+ if (IS_DAX(inode) && buffer_unwritten(bh)) {
+ /*
+ * dgc: I suspect unwritten conversion on ext4+DAX is
+ * fundamentally broken here when there are concurrent
+ * read/write in progress on this inode.
+ */
+ WARN_ON_ONCE(io_end);
bh->b_assoc_map = inode->i_mapping;
bh->b_private = (void *)(unsigned long)iblock;
- bh->b_end_io = ext4_end_io_unwritten;
}
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
*((unsigned int *)kp->arg) = num;
return 0;
}
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
return res;
}
+EXPORT_SYMBOL(seq_dentry);
static void *single_start(struct seq_file *p, loff_t *pos)
{
{
xfs_agblock_t bno;
xfs_extlen_t len;
+ xfs_extlen_t diff;
/* Trim busy sections out of found extent */
xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+ /*
+ * If we have a largish extent that happens to start before min_agbno,
+ * see if we can shift it into range...
+ */
+ if (bno < args->min_agbno && bno + len > args->min_agbno) {
+ diff = args->min_agbno - bno;
+ if (len > diff) {
+ bno += diff;
+ len -= diff;
+ }
+ }
+
if (args->alignment > 1 && len >= args->minlen) {
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
- xfs_extlen_t diff = aligned_bno - bno;
+
+ diff = aligned_bno - bno;
*resbno = aligned_bno;
*reslen = diff >= len ? 0 : len - diff;
* The good extent is closer than this one.
*/
if (!dir) {
+ if (*sbnoa > args->max_agbno)
+ goto out_use_good;
if (*sbnoa >= args->agbno + gdiff)
goto out_use_good;
} else {
+ if (*sbnoa < args->min_agbno)
+ goto out_use_good;
if (*sbnoa <= args->agbno - gdiff)
goto out_use_good;
}
dofirst = prandom_u32() & 1;
#endif
+ /* handle unitialized agbno range so caller doesn't have to */
+ if (!args->min_agbno && !args->max_agbno)
+ args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
+ ASSERT(args->min_agbno <= args->max_agbno);
+
+ /* clamp agbno to the range if it's outside */
+ if (args->agbno < args->min_agbno)
+ args->agbno = args->min_agbno;
+ if (args->agbno > args->max_agbno)
+ args->agbno = args->max_agbno;
+
restart:
bno_cur_lt = NULL;
bno_cur_gt = NULL;
<bnoa, <lena);
if (ltlena < args->minlen)
continue;
+ if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
+ continue;
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
ASSERT(args->len >= args->minlen);
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
xfs_alloc_compute_aligned(args, ltbno, ltlen,
<bnoa, <lena);
- if (ltlena >= args->minlen)
+ if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
break;
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0;
- if (!i) {
+ if (!i || ltbnoa < args->min_agbno) {
xfs_btree_del_cursor(bno_cur_lt,
XFS_BTREE_NOERROR);
bno_cur_lt = NULL;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
xfs_alloc_compute_aligned(args, gtbno, gtlen,
>bnoa, >lena);
- if (gtlena >= args->minlen)
+ if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
break;
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
- if (!i) {
+ if (!i || gtbnoa > args->max_agbno) {
xfs_btree_del_cursor(bno_cur_gt,
XFS_BTREE_NOERROR);
bno_cur_gt = NULL;
ASSERT(ltnew >= ltbno);
ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
args->agbno = ltnew;
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
xfs_extlen_t
xfs_alloc_longest_free_extent(
struct xfs_mount *mp,
- struct xfs_perag *pag)
+ struct xfs_perag *pag,
+ xfs_extlen_t need)
{
- xfs_extlen_t need, delta = 0;
+ xfs_extlen_t delta = 0;
- need = XFS_MIN_FREELIST_PAG(pag, mp);
if (need > pag->pagf_flcount)
delta = need - pag->pagf_flcount;
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
}
+unsigned int
+xfs_alloc_min_freelist(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag)
+{
+ unsigned int min_free;
+
+ /* space needed by-bno freespace btree */
+ min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
+ mp->m_ag_maxlevels);
+ /* space needed by-size freespace btree */
+ min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
+ mp->m_ag_maxlevels);
+
+ return min_free;
+}
+
+/*
+ * Check if the operation we are fixing up the freelist for should go ahead or
+ * not. If we are freeing blocks, we always allow it, otherwise the allocation
+ * is dependent on whether the size and shape of free space available will
+ * permit the requested allocation to take place.
+ */
+static bool
+xfs_alloc_space_available(
+ struct xfs_alloc_arg *args,
+ xfs_extlen_t min_free,
+ int flags)
+{
+ struct xfs_perag *pag = args->pag;
+ xfs_extlen_t longest;
+ int available;
+
+ if (flags & XFS_ALLOC_FLAG_FREEING)
+ return true;
+
+ /* do we have enough contiguous free space for the allocation? */
+ longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
+ if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
+ return false;
+
+ /* do have enough free space remaining for the allocation? */
+ available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
+ min_free - args->total);
+ if (available < (int)args->minleft)
+ return false;
+
+ return true;
+}
+
/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
STATIC int /* error */
xfs_alloc_fix_freelist(
- xfs_alloc_arg_t *args, /* allocation argument structure */
- int flags) /* XFS_ALLOC_FLAG_... */
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ int flags) /* XFS_ALLOC_FLAG_... */
{
- xfs_buf_t *agbp; /* agf buffer pointer */
- xfs_agf_t *agf; /* a.g. freespace structure pointer */
- xfs_buf_t *agflbp;/* agfl buffer pointer */
- xfs_agblock_t bno; /* freelist block */
- xfs_extlen_t delta; /* new blocks needed in freelist */
- int error; /* error result code */
- xfs_extlen_t longest;/* longest extent in allocation group */
- xfs_mount_t *mp; /* file system mount point structure */
- xfs_extlen_t need; /* total blocks needed in freelist */
- xfs_perag_t *pag; /* per-ag information structure */
- xfs_alloc_arg_t targs; /* local allocation arguments */
- xfs_trans_t *tp; /* transaction pointer */
-
- mp = args->mp;
+ struct xfs_mount *mp = args->mp;
+ struct xfs_perag *pag = args->pag;
+ struct xfs_trans *tp = args->tp;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_buf *agflbp = NULL;
+ struct xfs_alloc_arg targs; /* local allocation arguments */
+ xfs_agblock_t bno; /* freelist block */
+ xfs_extlen_t need; /* total blocks needed in freelist */
+ int error;
- pag = args->pag;
- tp = args->tp;
if (!pag->pagf_init) {
- if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
- &agbp)))
- return error;
+ error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+ if (error)
+ goto out_no_agbp;
if (!pag->pagf_init) {
ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- args->agbp = NULL;
- return 0;
+ goto out_agbp_relse;
}
- } else
- agbp = NULL;
+ }
/*
- * If this is a metadata preferred pag and we are user data
- * then try somewhere else if we are not being asked to
- * try harder at this point
+ * If this is a metadata preferred pag and we are user data then try
+ * somewhere else if we are not being asked to try harder at this
+ * point
*/
if (pag->pagf_metadata && args->userdata &&
(flags & XFS_ALLOC_FLAG_TRYLOCK)) {
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- args->agbp = NULL;
- return 0;
+ goto out_agbp_relse;
}
- if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
- /*
- * If it looks like there isn't a long enough extent, or enough
- * total blocks, reject it.
- */
- need = XFS_MIN_FREELIST_PAG(pag, mp);
- longest = xfs_alloc_longest_free_extent(mp, pag);
- if ((args->minlen + args->alignment + args->minalignslop - 1) >
- longest ||
- ((int)(pag->pagf_freeblks + pag->pagf_flcount -
- need - args->total) < (int)args->minleft)) {
- if (agbp)
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
- }
- }
+ need = xfs_alloc_min_freelist(mp, pag);
+ if (!xfs_alloc_space_available(args, need, flags))
+ goto out_agbp_relse;
/*
* Get the a.g. freespace buffer.
* Can fail if we're not blocking on locks, and it's held.
*/
- if (agbp == NULL) {
- if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
- &agbp)))
- return error;
- if (agbp == NULL) {
+ if (!agbp) {
+ error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+ if (error)
+ goto out_no_agbp;
+ if (!agbp) {
ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- args->agbp = NULL;
- return 0;
- }
- }
- /*
- * Figure out how many blocks we should have in the freelist.
- */
- agf = XFS_BUF_TO_AGF(agbp);
- need = XFS_MIN_FREELIST(agf, mp);
- /*
- * If there isn't enough total or single-extent, reject it.
- */
- if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
- delta = need > be32_to_cpu(agf->agf_flcount) ?
- (need - be32_to_cpu(agf->agf_flcount)) : 0;
- longest = be32_to_cpu(agf->agf_longest);
- longest = (longest > delta) ? (longest - delta) :
- (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
- if ((args->minlen + args->alignment + args->minalignslop - 1) >
- longest ||
- ((int)(be32_to_cpu(agf->agf_freeblks) +
- be32_to_cpu(agf->agf_flcount) - need - args->total) <
- (int)args->minleft)) {
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
+ goto out_no_agbp;
}
}
+
+ /* If there isn't enough total space or single-extent, reject it. */
+ need = xfs_alloc_min_freelist(mp, pag);
+ if (!xfs_alloc_space_available(args, need, flags))
+ goto out_agbp_relse;
+
/*
* Make the freelist shorter if it's too long.
+ *
+ * Note that from this point onwards, we will always release the agf and
+ * agfl buffers on error. This handles the case where we error out and
+ * the buffers are clean or may not have been joined to the transaction
+ * and hence need to be released manually. If they have been joined to
+ * the transaction, then xfs_trans_brelse() will handle them
+ * appropriately based on the recursion count and dirty state of the
+ * buffer.
+ *
+ * XXX (dgc): When we have lots of free space, does this buy us
+ * anything other than extra overhead when we need to put more blocks
+ * back on the free list? Maybe we should only do this when space is
+ * getting low or the AGFL is more than half full?
*/
- while (be32_to_cpu(agf->agf_flcount) > need) {
- xfs_buf_t *bp;
+ while (pag->pagf_flcount > need) {
+ struct xfs_buf *bp;
error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
if (error)
- return error;
- if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
- return error;
+ goto out_agbp_relse;
+ error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
+ if (error)
+ goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
xfs_trans_binval(tp, bp);
}
- /*
- * Initialize the args structure.
- */
+
memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag;
- if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
- return error;
- /*
- * Make the freelist longer if it's too short.
- */
- while (be32_to_cpu(agf->agf_flcount) < need) {
+ error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
+ if (error)
+ goto out_agbp_relse;
+
+ /* Make the freelist longer if it's too short. */
+ while (pag->pagf_flcount < need) {
targs.agbno = 0;
- targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
- /*
- * Allocate as many blocks as possible at once.
- */
- if ((error = xfs_alloc_ag_vextent(&targs))) {
- xfs_trans_brelse(tp, agflbp);
- return error;
- }
+ targs.maxlen = need - pag->pagf_flcount;
+
+ /* Allocate as many blocks as possible at once. */
+ error = xfs_alloc_ag_vextent(&targs);
+ if (error)
+ goto out_agflbp_relse;
+
/*
* Stop if we run out. Won't happen if callers are obeying
* the restrictions correctly. Can happen for free calls
if (targs.agbno == NULLAGBLOCK) {
if (flags & XFS_ALLOC_FLAG_FREEING)
break;
- xfs_trans_brelse(tp, agflbp);
- args->agbp = NULL;
- return 0;
+ goto out_agflbp_relse;
}
/*
* Put each allocated block on the list.
error = xfs_alloc_put_freelist(tp, agbp,
agflbp, bno, 0);
if (error)
- return error;
+ goto out_agflbp_relse;
}
}
xfs_trans_brelse(tp, agflbp);
args->agbp = agbp;
return 0;
+
+out_agflbp_relse:
+ xfs_trans_brelse(tp, agflbp);
+out_agbp_relse:
+ if (agbp)
+ xfs_trans_brelse(tp, agbp);
+out_no_agbp:
+ args->agbp = NULL;
+ return error;
}
/*
xfs_extlen_t total; /* total blocks needed in xaction */
xfs_extlen_t alignment; /* align answer to multiple of this */
xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
+ xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */
+ xfs_agblock_t max_agbno; /* ... */
xfs_extlen_t len; /* output: actual size of extent */
xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
xfs_alloctype_t otype; /* original allocation type */
#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
-/*
- * Find the length of the longest extent in an AG.
- */
-xfs_extlen_t
-xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
+ struct xfs_perag *pag, xfs_extlen_t need);
+unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
struct xfs_perag *pag);
/*
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
if (error) {
- xfs_trans_cancel(args.trans, 0);
+ xfs_trans_cancel(args.trans);
return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
XFS_QMOPT_RES_REGBLKS);
if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_trans_cancel(args.trans);
return error;
}
xfs_trans_ichgtime(args.trans, dp,
XFS_ICHGTIME_CHG);
}
- err2 = xfs_trans_commit(args.trans,
- XFS_TRANS_RELEASE_LOG_RES);
+ err2 = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error ? error : err2;
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
- error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
out:
- if (args.trans) {
- xfs_trans_cancel(args.trans,
- XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- }
+ if (args.trans)
+ xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
XFS_ATTRRM_SPACE_RES(mp), 0);
if (error) {
- xfs_trans_cancel(args.trans, 0);
+ xfs_trans_cancel(args.trans);
return error;
}
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
- error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
out:
- if (args.trans) {
- xfs_trans_cancel(args.trans,
- XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- }
+ if (args.trans)
+ xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
- int cancel_flags = 0;
ASSERT(XFS_IFORK_Q(ip) == 0);
tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error)
goto trans_cancel;
- cancel_flags |= XFS_TRANS_ABORT;
if (XFS_IFORK_Q(ip))
goto trans_cancel;
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
goto bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
bmap_cancel:
xfs_bmap_cancel(&flist);
trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
}
}
- longest = xfs_alloc_longest_free_extent(mp, pag);
+ longest = xfs_alloc_longest_free_extent(mp, pag,
+ xfs_alloc_min_freelist(mp, pag));
if (*blen < longest)
*blen = longest;
error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
&bma->cur, mval, bma->firstblock, bma->flist,
&tmp_logflags);
- bma->logflags |= tmp_logflags;
+ /*
+ * Log the inode core unconditionally in the unwritten extent conversion
+ * path because the conversion might not have done so (e.g., if the
+ * extent count hasn't changed). We need to make sure the inode is dirty
+ * in the transaction for the sake of fsync(), even if nothing has
+ * changed, because fsync() will not force the log for this transaction
+ * unless it sees the inode pinned.
+ */
+ bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error)
return error;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
if (error)
goto out;
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
+ return xfs_trans_commit(tp);
out:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
__uint32_t sb_features_log_incompat;
__uint32_t sb_crc; /* superblock crc */
- __uint32_t sb_pad;
+ xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
xfs_ino_t sb_pquotino; /* project quota inode */
xfs_lsn_t sb_lsn; /* last write sequence */
__be32 sb_features_log_incompat;
__le32 sb_crc; /* superblock crc */
- __be32 sb_pad;
+ __be32 sb_spino_align; /* sparse inode chunk alignment */
__be64 sb_pquotino; /* project quota inode */
__be64 sb_lsn; /* last write sequence */
}
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
+#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
#define XFS_SB_FEAT_INCOMPAT_ALL \
- (XFS_SB_FEAT_INCOMPAT_FTYPE)
+ (XFS_SB_FEAT_INCOMPAT_FTYPE| \
+ XFS_SB_FEAT_INCOMPAT_SPINODES)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
}
+static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
+{
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
+}
+
/*
* end of superblock version macros
*/
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
-
-#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
-#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
- (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
-#define XFS_MIN_FREELIST(a,mp) \
- (XFS_MIN_FREELIST_RAW( \
- be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
- be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
-#define XFS_MIN_FREELIST_PAG(pag,mp) \
- (XFS_MIN_FREELIST_RAW( \
- (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
- (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
-
#define XFS_AGB_TO_FSB(mp,agno,agbno) \
(((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
#define XFS_FSB_TO_AGNO(mp,fsbno) \
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
+#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
+#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
+#define XFS_INODES_PER_HOLEMASK_BIT \
+ (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
{
return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
}
/*
- * Data record structure
+ * The on-disk inode record structure has two formats. The original "full"
+ * format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount
+ * and replaces the 3 high-order freecount bytes wth the holemask and inode
+ * count.
+ *
+ * The holemask of the sparse record format allows an inode chunk to have holes
+ * that refer to blocks not owned by the inode record. This facilitates inode
+ * allocation in the event of severe free space fragmentation.
*/
typedef struct xfs_inobt_rec {
__be32 ir_startino; /* starting inode number */
- __be32 ir_freecount; /* count of free inodes (set bits) */
+ union {
+ struct {
+ __be32 ir_freecount; /* count of free inodes */
+ } f;
+ struct {
+ __be16 ir_holemask;/* hole mask for sparse chunks */
+ __u8 ir_count; /* total inode count */
+ __u8 ir_freecount; /* count of free inodes */
+ } sp;
+ } ir_u;
__be64 ir_free; /* free inode mask */
} xfs_inobt_rec_t;
typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */
- __int32_t ir_freecount; /* count of free inodes (set bits) */
+ __uint16_t ir_holemask; /* hole mask for sparse chunks */
+ __uint8_t ir_count; /* total inode count */
+ __uint8_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */
} xfs_inobt_rec_incore_t;
+static inline bool xfs_inobt_issparse(uint16_t holemask)
+{
+ /* non-zero holemask represents a sparse rec. */
+ return holemask;
+}
/*
* Key structure
sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
/* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE "SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
+#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
/*
* Minimum and maximum sizes need for growth checks.
int *stat) /* success/failure */
{
cur->bc_rec.i.ir_startino = ino;
+ cur->bc_rec.i.ir_holemask = 0;
+ cur->bc_rec.i.ir_count = 0;
cur->bc_rec.i.ir_freecount = 0;
cur->bc_rec.i.ir_free = 0;
return xfs_btree_lookup(cur, dir, stat);
union xfs_btree_rec rec;
rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
- rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
+ rec.inobt.ir_u.sp.ir_count = irec->ir_count;
+ rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
+ } else {
+ /* ir_holemask/ir_count not supported on-disk */
+ rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
+ }
rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
return xfs_btree_update(cur, &rec);
}
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
- if (!error && *stat == 1) {
- irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
- irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
- irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+ if (error || *stat == 0)
+ return error;
+
+ irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
+ irec->ir_count = rec->inobt.ir_u.sp.ir_count;
+ irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
+ } else {
+ /*
+ * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
+ * values for full inode chunks.
+ */
+ irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
+ irec->ir_count = XFS_INODES_PER_CHUNK;
+ irec->ir_freecount =
+ be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
- return error;
+ irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+
+ return 0;
}
/*
STATIC int
xfs_inobt_insert_rec(
struct xfs_btree_cur *cur,
+ __uint16_t holemask,
+ __uint8_t count,
__int32_t freecount,
xfs_inofree_t free,
int *stat)
{
+ cur->bc_rec.i.ir_holemask = holemask;
+ cur->bc_rec.i.ir_count = count;
cur->bc_rec.i.ir_freecount = freecount;
cur->bc_rec.i.ir_free = free;
return xfs_btree_insert(cur, stat);
}
ASSERT(i == 0);
- error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+ error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
+ XFS_INODES_PER_CHUNK,
+ XFS_INODES_PER_CHUNK,
XFS_INOBT_ALL_FREE, &i);
if (error) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
struct xfs_mount *mp,
struct xfs_trans *tp,
struct list_head *buffer_list,
+ int icount,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_agblock_t length,
* they track in the AIL as if they were physically logged.
*/
if (tp)
- xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
+ xfs_icreate_log(tp, agno, agbno, icount,
mp->m_sb.sb_inodesize, length, gen);
} else
version = 2;
return 0;
}
+/*
+ * Align startino and allocmask for a recently allocated sparse chunk such that
+ * they are fit for insertion (or merge) into the on-disk inode btrees.
+ *
+ * Background:
+ *
+ * When enabled, sparse inode support increases the inode alignment from cluster
+ * size to inode chunk size. This means that the minimum range between two
+ * non-adjacent inode records in the inobt is large enough for a full inode
+ * record. This allows for cluster sized, cluster aligned block allocation
+ * without need to worry about whether the resulting inode record overlaps with
+ * another record in the tree. Without this basic rule, we would have to deal
+ * with the consequences of overlap by potentially undoing recent allocations in
+ * the inode allocation codepath.
+ *
+ * Because of this alignment rule (which is enforced on mount), there are two
+ * inobt possibilities for newly allocated sparse chunks. One is that the
+ * aligned inode record for the chunk covers a range of inodes not already
+ * covered in the inobt (i.e., it is safe to insert a new sparse record). The
+ * other is that a record already exists at the aligned startino that considers
+ * the newly allocated range as sparse. In the latter case, record content is
+ * merged in hope that sparse inode chunks fill to full chunks over time.
+ */
+STATIC void
+xfs_align_sparse_ino(
+ struct xfs_mount *mp,
+ xfs_agino_t *startino,
+ uint16_t *allocmask)
+{
+ xfs_agblock_t agbno;
+ xfs_agblock_t mod;
+ int offset;
+
+ agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
+ mod = agbno % mp->m_sb.sb_inoalignmt;
+ if (!mod)
+ return;
+
+ /* calculate the inode offset and align startino */
+ offset = mod << mp->m_sb.sb_inopblog;
+ *startino -= offset;
+
+ /*
+ * Since startino has been aligned down, left shift allocmask such that
+ * it continues to represent the same physical inodes relative to the
+ * new startino.
+ */
+ *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
+}
+
+/*
+ * Determine whether the source inode record can merge into the target. Both
+ * records must be sparse, the inode ranges must match and there must be no
+ * allocation overlap between the records.
+ */
+STATIC bool
+__xfs_inobt_can_merge(
+ struct xfs_inobt_rec_incore *trec, /* tgt record */
+ struct xfs_inobt_rec_incore *srec) /* src record */
+{
+ uint64_t talloc;
+ uint64_t salloc;
+
+ /* records must cover the same inode range */
+ if (trec->ir_startino != srec->ir_startino)
+ return false;
+
+ /* both records must be sparse */
+ if (!xfs_inobt_issparse(trec->ir_holemask) ||
+ !xfs_inobt_issparse(srec->ir_holemask))
+ return false;
+
+ /* both records must track some inodes */
+ if (!trec->ir_count || !srec->ir_count)
+ return false;
+
+ /* can't exceed capacity of a full record */
+ if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
+ return false;
+
+ /* verify there is no allocation overlap */
+ talloc = xfs_inobt_irec_to_allocmask(trec);
+ salloc = xfs_inobt_irec_to_allocmask(srec);
+ if (talloc & salloc)
+ return false;
+
+ return true;
+}
+
+/*
+ * Merge the source inode record into the target. The caller must call
+ * __xfs_inobt_can_merge() to ensure the merge is valid.
+ */
+STATIC void
+__xfs_inobt_rec_merge(
+ struct xfs_inobt_rec_incore *trec, /* target */
+ struct xfs_inobt_rec_incore *srec) /* src */
+{
+ ASSERT(trec->ir_startino == srec->ir_startino);
+
+ /* combine the counts */
+ trec->ir_count += srec->ir_count;
+ trec->ir_freecount += srec->ir_freecount;
+
+ /*
+ * Merge the holemask and free mask. For both fields, 0 bits refer to
+ * allocated inodes. We combine the allocated ranges with bitwise AND.
+ */
+ trec->ir_holemask &= srec->ir_holemask;
+ trec->ir_free &= srec->ir_free;
+}
+
+/*
+ * Insert a new sparse inode chunk into the associated inode btree. The inode
+ * record for the sparse chunk is pre-aligned to a startino that should match
+ * any pre-existing sparse inode record in the tree. This allows sparse chunks
+ * to fill over time.
+ *
+ * This function supports two modes of handling preexisting records depending on
+ * the merge flag. If merge is true, the provided record is merged with the
+ * existing record and updated in place. The merged record is returned in nrec.
+ * If merge is false, an existing record is replaced with the provided record.
+ * If no preexisting record exists, the provided record is always inserted.
+ *
+ * It is considered corruption if a merge is requested and not possible. Given
+ * the sparse inode alignment constraints, this should never happen.
+ */
+STATIC int
+xfs_inobt_insert_sprec(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ int btnum,
+ struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
+ bool merge) /* merge or replace */
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ int error;
+ int i;
+ struct xfs_inobt_rec_incore rec;
+
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
+
+ /* the new record is pre-aligned so we know where to look */
+ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
+ if (error)
+ goto error;
+ /* if nothing there, insert a new record and return */
+ if (i == 0) {
+ error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
+ nrec->ir_count, nrec->ir_freecount,
+ nrec->ir_free, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+
+ goto out;
+ }
+
+ /*
+ * A record exists at this startino. Merge or replace the record
+ * depending on what we've been asked to do.
+ */
+ if (merge) {
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ rec.ir_startino == nrec->ir_startino,
+ error);
+
+ /*
+ * This should never fail. If we have coexisting records that
+ * cannot merge, something is seriously wrong.
+ */
+ XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
+ error);
+
+ trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
+ rec.ir_holemask, nrec->ir_startino,
+ nrec->ir_holemask);
+
+ /* merge to nrec to output the updated record */
+ __xfs_inobt_rec_merge(nrec, &rec);
+
+ trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,
+ nrec->ir_holemask);
+
+ error = xfs_inobt_rec_check_count(mp, nrec);
+ if (error)
+ goto error;
+ }
+
+ error = xfs_inobt_update(cur, nrec);
+ if (error)
+ goto error;
+
+out:
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+error:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
/*
* Allocate new inodes in the allocation group specified by agbp.
* Return 0 for success, else error code.
xfs_agino_t newlen; /* new number of inodes */
int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */
+ uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
+ struct xfs_inobt_rec_incore rec;
struct xfs_perag *pag;
+ int do_sparse = 0;
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = tp->t_mountp;
+ args.fsbno = NULLFSBLOCK;
+
+#ifdef DEBUG
+ /* randomly do sparse inode allocations */
+ if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
+ args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+ do_sparse = prandom_u32() & 1;
+#endif
/*
* Locking will ensure that we don't have two callers in here
agno = be32_to_cpu(agi->agi_seqno);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
args.mp->m_ialloc_blks;
+ if (do_sparse)
+ goto sparse_alloc;
if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
* subsequent requests.
*/
args.minalignslop = 0;
- } else
- args.fsbno = NULLFSBLOCK;
+ }
if (unlikely(args.fsbno == NULLFSBLOCK)) {
/*
return error;
}
+ /*
+ * Finally, try a sparse allocation if the filesystem supports it and
+ * the sparse allocation length is smaller than a full chunk.
+ */
+ if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
+ args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+ args.fsbno == NULLFSBLOCK) {
+sparse_alloc:
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.agbno = be32_to_cpu(agi->agi_root);
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
+ args.alignment = args.mp->m_sb.sb_spino_align;
+ args.prod = 1;
+
+ args.minlen = args.mp->m_ialloc_min_blks;
+ args.maxlen = args.minlen;
+
+ /*
+ * The inode record will be aligned to full chunk size. We must
+ * prevent sparse allocation from AG boundaries that result in
+ * invalid inode records, such as records that start at agbno 0
+ * or extend beyond the AG.
+ *
+ * Set min agbno to the first aligned, non-zero agbno and max to
+ * the last aligned agbno that is at least one full chunk from
+ * the end of the AG.
+ */
+ args.min_agbno = args.mp->m_sb.sb_inoalignmt;
+ args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+ args.mp->m_sb.sb_inoalignmt) -
+ args.mp->m_ialloc_blks;
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ return error;
+
+ newlen = args.len << args.mp->m_sb.sb_inopblog;
+ ASSERT(newlen <= XFS_INODES_PER_CHUNK);
+ allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
+ }
+
if (args.fsbno == NULLFSBLOCK) {
*alloc = 0;
return 0;
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
- args.len, prandom_u32());
+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,
+ args.agbno, args.len, prandom_u32());
if (error)
return error;
* Convert the results.
*/
newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+
+ if (xfs_inobt_issparse(~allocmask)) {
+ /*
+ * We've allocated a sparse chunk. Align the startino and mask.
+ */
+ xfs_align_sparse_ino(args.mp, &newino, &allocmask);
+
+ rec.ir_startino = newino;
+ rec.ir_holemask = ~allocmask;
+ rec.ir_count = newlen;
+ rec.ir_freecount = newlen;
+ rec.ir_free = XFS_INOBT_ALL_FREE;
+
+ /*
+ * Insert the sparse record into the inobt and allow for a merge
+ * if necessary. If a merge does occur, rec is updated to the
+ * merged record.
+ */
+ error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,
+ &rec, true);
+ if (error == -EFSCORRUPTED) {
+ xfs_alert(args.mp,
+ "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
+ XFS_AGINO_TO_INO(args.mp, agno,
+ rec.ir_startino),
+ rec.ir_holemask, rec.ir_count);
+ xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
+ }
+ if (error)
+ return error;
+
+ /*
+ * We can't merge the part we've just allocated as for the inobt
+ * due to finobt semantics. The original record may or may not
+ * exist independent of whether physical inodes exist in this
+ * sparse chunk.
+ *
+ * We must update the finobt record based on the inobt record.
+ * rec contains the fully merged and up to date inobt record
+ * from the previous call. Set merge false to replace any
+ * existing record with this one.
+ */
+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+ error = xfs_inobt_insert_sprec(args.mp, tp, agbp,
+ XFS_BTNUM_FINO, &rec,
+ false);
+ if (error)
+ return error;
+ }
+ } else {
+ /* full chunk - insert new records to both btrees */
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+ XFS_BTNUM_INO);
+ if (error)
+ return error;
+
+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino,
+ newlen, XFS_BTNUM_FINO);
+ if (error)
+ return error;
+ }
+ }
+
+ /*
+ * Update AGI counts and newino.
+ */
be32_add_cpu(&agi->agi_count, newlen);
be32_add_cpu(&agi->agi_freecount, newlen);
pag = xfs_perag_get(args.mp, agno);
xfs_perag_put(pag);
agi->agi_newino = cpu_to_be32(newino);
- /*
- * Insert records describing the new inode chunk into the btrees.
- */
- error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
- XFS_BTNUM_INO);
- if (error)
- return error;
-
- if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
- error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
- XFS_BTNUM_FINO);
- if (error)
- return error;
- }
/*
* Log allocation group header fields
*/
* if we fail allocation due to alignment issues then it is most
* likely a real ENOSPC condition.
*/
- ineed = mp->m_ialloc_blks;
+ ineed = mp->m_ialloc_min_blks;
if (flags && ineed > 1)
ineed += xfs_ialloc_cluster_alignment(mp);
longest = pag->pagf_longest;
return 0;
}
+/*
+ * Return the offset of the first free inode in the record. If the inode chunk
+ * is sparsely allocated, we convert the record holemask to inode granularity
+ * and mask off the unallocated regions from the inode free mask.
+ */
+STATIC int
+xfs_inobt_first_free_inode(
+ struct xfs_inobt_rec_incore *rec)
+{
+ xfs_inofree_t realfree;
+
+ /* if there are no holes, return the first available offset */
+ if (!xfs_inobt_issparse(rec->ir_holemask))
+ return xfs_lowbit64(rec->ir_free);
+
+ realfree = xfs_inobt_irec_to_allocmask(rec);
+ realfree &= rec->ir_free;
+
+ return xfs_lowbit64(realfree);
+}
+
/*
* Allocate an inode using the inobt-only algorithm.
*/
}
alloc_inode:
- offset = xfs_lowbit64(rec.ir_free);
+ offset = xfs_inobt_first_free_inode(&rec);
ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
if (error)
goto error_cur;
- offset = xfs_lowbit64(rec.ir_free);
+ offset = xfs_inobt_first_free_inode(&rec);
ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
return error;
}
+/*
+ * Free the blocks of an inode chunk. We must consider that the inode chunk
+ * might be sparse and only free the regions that are allocated as part of the
+ * chunk.
+ */
+STATIC void
+xfs_difree_inode_chunk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *rec,
+ struct xfs_bmap_free *flist)
+{
+ xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
+ int startidx, endidx;
+ int nextbit;
+ xfs_agblock_t agbno;
+ int contigblk;
+ DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
+
+ if (!xfs_inobt_issparse(rec->ir_holemask)) {
+ /* not sparse, calculate extent info directly */
+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
+ XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
+ mp->m_ialloc_blks, flist, mp);
+ return;
+ }
+
+ /* holemask is only 16-bits (fits in an unsigned long) */
+ ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
+ holemask[0] = rec->ir_holemask;
+
+ /*
+ * Find contiguous ranges of zeroes (i.e., allocated regions) in the
+ * holemask and convert the start/end index of each range to an extent.
+ * We start with the start and end index both pointing at the first 0 in
+ * the mask.
+ */
+ startidx = endidx = find_first_zero_bit(holemask,
+ XFS_INOBT_HOLEMASK_BITS);
+ nextbit = startidx + 1;
+ while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+ nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
+ nextbit);
+ /*
+ * If the next zero bit is contiguous, update the end index of
+ * the current range and continue.
+ */
+ if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
+ nextbit == endidx + 1) {
+ endidx = nextbit;
+ goto next;
+ }
+
+ /*
+ * nextbit is not contiguous with the current end index. Convert
+ * the current start/end to an extent and add it to the free
+ * list.
+ */
+ agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
+ mp->m_sb.sb_inopblock;
+ contigblk = ((endidx - startidx + 1) *
+ XFS_INODES_PER_HOLEMASK_BIT) /
+ mp->m_sb.sb_inopblock;
+
+ ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
+ ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+ flist, mp);
+
+ /* reset range to current bit and carry on... */
+ startidx = endidx = nextbit;
+
+next:
+ nextbit++;
+ }
+}
+
STATIC int
xfs_difree_inobt(
struct xfs_mount *mp,
struct xfs_buf *agbp,
xfs_agino_t agino,
struct xfs_bmap_free *flist,
- int *deleted,
- xfs_ino_t *first_ino,
+ struct xfs_icluster *xic,
struct xfs_inobt_rec_incore *orec)
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
rec.ir_freecount++;
/*
- * When an inode cluster is free, it becomes eligible for removal
+ * When an inode chunk is free, it becomes eligible for removal. Don't
+ * remove the chunk if the block size is large enough for multiple inode
+ * chunks (that might not be free).
*/
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
- (rec.ir_freecount == mp->m_ialloc_inos)) {
-
- *deleted = 1;
- *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+ rec.ir_free == XFS_INOBT_ALL_FREE &&
+ mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
+ xic->deleted = 1;
+ xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+ xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/*
* Remove the inode cluster from the AGI B+Tree, adjust the
* AGI and Superblock inode counts, and mark the disk space
* to be freed when the transaction is committed.
*/
- ilen = mp->m_ialloc_inos;
+ ilen = rec.ir_freecount;
be32_add_cpu(&agi->agi_count, -ilen);
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
goto error0;
}
- xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
- XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
- mp->m_ialloc_blks, flist, mp);
+ xfs_difree_inode_chunk(mp, agno, &rec, flist);
} else {
- *deleted = 0;
+ xic->deleted = 0;
error = xfs_inobt_update(cur, &rec);
if (error) {
*/
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
- error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+ error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
+ ibtrec->ir_count,
+ ibtrec->ir_freecount,
ibtrec->ir_free, &i);
if (error)
goto error;
* free inode. Hence, if all of the inodes are free and we aren't
* keeping inode chunks permanently on disk, remove the record.
* Otherwise, update the record with the new information.
+ *
+ * Note that we currently can't free chunks when the block size is large
+ * enough for multiple chunks. Leave the finobt record to remain in sync
+ * with the inobt.
*/
- if (rec.ir_freecount == mp->m_ialloc_inos &&
+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&
+ mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
error = xfs_btree_delete(cur, &i);
if (error)
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *deleted,/* set if inode cluster was deleted */
- xfs_ino_t *first_ino)/* first inode in deleted cluster */
+ struct xfs_icluster *xic) /* cluster info if deleted */
{
/* REFERENCED */
xfs_agblock_t agbno; /* block number containing inode */
/*
* Fix up the inode allocation btree.
*/
- error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
- &rec);
+ error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
if (error)
goto error0;
/* Move inodes in clusters of this size */
#define XFS_INODE_BIG_CLUSTER_SIZE 8192
+struct xfs_icluster {
+ bool deleted; /* record is deleted */
+ xfs_ino_t first_ino; /* first inode number */
+ uint64_t alloc; /* inode phys. allocation bitmap for
+ * sparse chunks */
+};
+
/* Calculate and return the number of filesystem blocks per inode cluster */
static inline int
xfs_icluster_size_fsb(
static inline struct xfs_dinode *
xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
{
- return (struct xfs_dinode *)
- (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
+ return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog);
}
/*
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *deleted, /* set if inode cluster was deleted */
- xfs_ino_t *first_ino); /* first inode in deleted cluster */
+ struct xfs_icluster *ifree); /* cluster info if deleted */
/*
* Return the location of the inode in imap, for mapping it into a buffer.
* Inode chunk initialisation routine
*/
int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
- struct list_head *buffer_list,
+ struct list_head *buffer_list, int icount,
xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_agblock_t length, unsigned int gen);
union xfs_btree_rec *rec)
{
rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
- rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ rec->inobt.ir_u.sp.ir_holemask =
+ cpu_to_be16(cur->bc_rec.i.ir_holemask);
+ rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
+ rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;
+ } else {
+ /* ir_holemask/ir_count not supported on-disk */
+ rec->inobt.ir_u.f.ir_freecount =
+ cpu_to_be32(cur->bc_rec.i.ir_freecount);
+ }
rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
}
return blocklen / sizeof(xfs_inobt_rec_t);
return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
}
+
+/*
+ * Convert the inode record holemask to an inode allocation bitmap. The inode
+ * allocation bitmap is inode granularity and specifies whether an inode is
+ * physically allocated on disk (not whether the inode is considered allocated
+ * or free by the fs).
+ *
+ * A bit value of 1 means the inode is allocated, a value of 0 means it is free.
+ */
+uint64_t
+xfs_inobt_irec_to_allocmask(
+ struct xfs_inobt_rec_incore *rec)
+{
+ uint64_t bitmap = 0;
+ uint64_t inodespbit;
+ int nextbit;
+ uint allocbitmap;
+
+ /*
+ * The holemask has 16-bits for a 64 inode record. Therefore each
+ * holemask bit represents multiple inodes. Create a mask of bits to set
+ * in the allocmask for each holemask bit.
+ */
+ inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
+
+ /*
+ * Allocated inodes are represented by 0 bits in holemask. Invert the 0
+ * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask
+ * anything beyond the 16 holemask bits since this casts to a larger
+ * type.
+ */
+ allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);
+
+ /*
+ * allocbitmap is the inverted holemask so every set bit represents
+ * allocated inodes. To expand from 16-bit holemask granularity to
+ * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target
+ * bitmap for every holemask bit.
+ */
+ nextbit = xfs_next_bit(&allocbitmap, 1, 0);
+ while (nextbit != -1) {
+ ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));
+
+ bitmap |= (inodespbit <<
+ (nextbit * XFS_INODES_PER_HOLEMASK_BIT));
+
+ nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);
+ }
+
+ return bitmap;
+}
+
+#if defined(DEBUG) || defined(XFS_WARN)
+/*
+ * Verify that an in-core inode record has a valid inode count.
+ */
+int
+xfs_inobt_rec_check_count(
+ struct xfs_mount *mp,
+ struct xfs_inobt_rec_incore *rec)
+{
+ int inocount = 0;
+ int nextbit = 0;
+ uint64_t allocbmap;
+ int wordsz;
+
+ wordsz = sizeof(allocbmap) / sizeof(unsigned int);
+ allocbmap = xfs_inobt_irec_to_allocmask(rec);
+
+ nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);
+ while (nextbit != -1) {
+ inocount++;
+ nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,
+ nextbit + 1);
+ }
+
+ if (inocount != rec->ir_count)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+#endif /* DEBUG */
xfs_btnum_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
+/* ir_holemask to inode allocation bitmap conversion */
+uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
+
+#if defined(DEBUG) || defined(XFS_WARN)
+int xfs_inobt_rec_check_count(struct xfs_mount *,
+ struct xfs_inobt_rec_incore *);
+#else
+#define xfs_inobt_rec_check_count(mp, rec) 0
+#endif /* DEBUG */
+
#endif /* __XFS_IALLOC_BTREE_H__ */
j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
for (i = 0; i < j; i++) {
- dip = (xfs_dinode_t *)xfs_buf_offset(bp,
- i * mp->m_sb.sb_inodesize);
+ dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
if (!dip->di_next_unlinked) {
xfs_alert(mp,
"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
int di_ok;
xfs_dinode_t *dip;
- dip = (struct xfs_dinode *)xfs_buf_offset(bp,
- (i << mp->m_sb.sb_inodelog));
+ dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
}
*bpp = bp;
- *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
+ *dipp = xfs_buf_offset(bp, imap->im_boffset);
return 0;
}
return -EFSCORRUPTED;
}
+ /*
+ * Full inode chunks must be aligned to inode chunk size when
+ * sparse inodes are enabled to support the sparse chunk
+ * allocation algorithm and prevent overlapping inode records.
+ */
+ if (xfs_sb_version_hassparseinodes(sbp)) {
+ uint32_t align;
+
+ xfs_alert(mp,
+ "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
+
+ align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
+ >> sbp->sb_blocklog;
+ if (sbp->sb_inoalignmt != align) {
+ xfs_warn(mp,
+"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
+ sbp->sb_inoalignmt, align);
+ return -EINVAL;
+ }
+ }
+
if (unlikely(
sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
xfs_warn(mp,
be32_to_cpu(from->sb_features_log_incompat);
/* crc is only used on disk, not in memory; just init to 0 here. */
to->sb_crc = 0;
- to->sb_pad = 0;
+ to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn);
/* Convert on-disk flags to in-memory flags? */
cpu_to_be32(from->sb_features_incompat);
to->sb_features_log_incompat =
cpu_to_be32(from->sb_features_log_incompat);
- to->sb_pad = 0;
+ to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
to->sb_lsn = cpu_to_be64(from->sb_lsn);
}
}
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+
+ if (sbp->sb_spino_align)
+ mp->m_ialloc_min_blks = sbp->sb_spino_align;
+ else
+ mp->m_ialloc_min_blks = mp->m_ialloc_blks;
}
/*
tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_log_sb(tp);
if (wait)
xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
count in superblock */
-/*
- * Values for call flags parameter.
- */
-#define XFS_TRANS_RELEASE_LOG_RES 0x4
-#define XFS_TRANS_ABORT 0x8
-
/*
* Field values for xfs_trans_mod_sb.
*/
* 2 trees * (2 blocks/level * max depth - 1) * block size
*/
#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
- ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
+ ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
- ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
+ ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
/*
* Per-directory log reservation for any directory change.
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \
- (2 * XFS_AG_MAXLEVELS(mp))
+ (2 * (mp)->m_ag_maxlevels)
#define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
#define XFS_LINK_SPACE_RES(mp,nl) \
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
isize = xfs_new_eof(ip, offset + size);
if (!isize) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return 0;
}
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
STATIC int
sector_t iblock,
struct buffer_head *bh_result,
int create,
- int direct)
+ bool direct)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
if (error)
return error;
new = 1;
+
} else {
/*
* Delalloc reservations do not require a transaction,
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, false);
}
-STATIC int
+int
xfs_get_blocks_direct(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, true);
}
-/*
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
- struct kiocb *iocb,
+static void
+__xfs_end_io_direct_write(
+ struct inode *inode,
+ struct xfs_ioend *ioend,
loff_t offset,
- ssize_t size,
- void *private)
+ ssize_t size)
{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_ioend *ioend = private;
-
- trace_xfs_gbmap_direct_endio(ip, offset, size,
- ioend ? ioend->io_type : 0, NULL);
+ struct xfs_mount *mp = XFS_I(inode)->i_mount;
- if (!ioend) {
- ASSERT(offset + size <= i_size_read(inode));
- return;
- }
-
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
goto out_end_io;
/*
* here can result in EOF moving backwards and Bad Things Happen when
* that occurs.
*/
- spin_lock(&ip->i_flags_lock);
+ spin_lock(&XFS_I(inode)->i_flags_lock);
if (offset + size > i_size_read(inode))
i_size_write(inode, offset + size);
- spin_unlock(&ip->i_flags_lock);
+ spin_unlock(&XFS_I(inode)->i_flags_lock);
/*
* If we are doing an append IO that needs to update the EOF on disk,
return;
}
+/*
+ * Complete a direct I/O write request.
+ *
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
+ * case the completion can be called in interrupt context, whereas if we have an
+ * ioend we will always be called in task context (i.e. from a workqueue).
+ */
+STATIC void
+xfs_end_io_direct_write(
+ struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_ioend *ioend = private;
+
+ trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
+ ioend ? ioend->io_type : 0, NULL);
+
+ if (!ioend) {
+ ASSERT(offset + size <= i_size_read(inode));
+ return;
+ }
+
+ __xfs_end_io_direct_write(inode, ioend, offset, size);
+}
+
+/*
+ * For DAX we need a mapping buffer callback for unwritten extent conversion
+ * when page faults allocate blocks and then zero them. Note that in this
+ * case the mapping indicated by the ioend may extend beyond EOF. We most
+ * definitely do not want to extend EOF here, so we trim back the ioend size to
+ * EOF.
+ */
+#ifdef CONFIG_FS_DAX
+void
+xfs_end_io_dax_write(
+ struct buffer_head *bh,
+ int uptodate)
+{
+ struct xfs_ioend *ioend = bh->b_private;
+ struct inode *inode = ioend->io_inode;
+ ssize_t size = ioend->io_size;
+
+ ASSERT(IS_DAX(ioend->io_inode));
+
+ /* if there was an error zeroing, then don't convert it */
+ if (!uptodate)
+ ioend->io_error = -EIO;
+
+ /*
+ * Trim update to EOF, so we don't extend EOF during unwritten extent
+ * conversion of partial EOF blocks.
+ */
+ spin_lock(&XFS_I(inode)->i_flags_lock);
+ if (ioend->io_offset + size > i_size_read(inode))
+ size = i_size_read(inode) - ioend->io_offset;
+ spin_unlock(&XFS_I(inode)->i_flags_lock);
+
+ __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
+
+}
+#else
+void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
+#endif
+
+static inline ssize_t
+xfs_vm_do_dio(
+ struct inode *inode,
+ struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset,
+ void (*endio)(struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private),
+ int flags)
+{
+ struct block_device *bdev;
+
+ if (IS_DAX(inode))
+ return dax_do_io(iocb, inode, iter, offset,
+ xfs_get_blocks_direct, endio, 0);
+
+ bdev = xfs_find_bdev_for_inode(inode);
+ return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+ xfs_get_blocks_direct, endio, NULL, flags);
+}
+
STATIC ssize_t
xfs_vm_direct_IO(
struct kiocb *iocb,
loff_t offset)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
- if (iov_iter_rw(iter) == WRITE) {
- return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
- xfs_get_blocks_direct,
- xfs_end_io_direct_write, NULL,
- DIO_ASYNC_EXTEND);
- }
- return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
- xfs_get_blocks_direct, NULL, NULL, 0);
+ if (iov_iter_rw(iter) == WRITE)
+ return xfs_vm_do_dio(inode, iocb, iter, offset,
+ xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
+ return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
}
/*
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+int xfs_get_blocks(struct inode *inode, sector_t offset,
+ struct buffer_head *map_bh, int create);
+int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
+ struct buffer_head *map_bh, int create);
+void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
extern void xfs_count_page_state(struct page *, int *, int *);
{
struct xfs_trans *trans;
struct xfs_mount *mp;
- int cancel_flags = 0;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
goto out_cancel;
lock_mode = XFS_ILOCK_EXCL;
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
xfs_ilock(dp, lock_mode);
if (!XFS_IFORK_Q(dp))
*/
xfs_trans_ijoin(trans, dp, 0);
- /* invalidate and truncate the attribute fork extents */
- if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+ /*
+ * Invalidate and truncate the attribute fork extents. Make sure the
+ * fork actually has attributes as otherwise the invalidation has no
+ * blocks to read and returns an error. In this case, just do the fork
+ * removal below.
+ */
+ if (xfs_inode_hasattr(dp) &&
+ dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
/* Reset the attribute fork - this also destroys the in-core fork */
xfs_attr_fork_remove(dp, trans);
- error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(trans);
xfs_iunlock(dp, lock_mode);
return error;
out_cancel:
- xfs_trans_cancel(trans, cancel_flags);
+ xfs_trans_cancel(trans);
out_destroy_fork:
/* kill the in-core attr fork before we drop the inode lock */
if (dp->i_afp)
xfs_efi_log_item_t *efi; /* extent free intention */
int error; /* error return value */
xfs_bmap_free_item_t *free; /* free extent item */
- struct xfs_trans_res tres; /* new log reservation */
xfs_mount_t *mp; /* filesystem mount structure */
xfs_bmap_free_item_t *next; /* next item on free list */
- xfs_trans_t *ntp; /* new transaction pointer */
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
if (flist->xbf_count == 0) {
*committed = 0;
return 0;
}
- ntp = *tp;
- efi = xfs_trans_get_efi(ntp, flist->xbf_count);
+ efi = xfs_trans_get_efi(*tp, flist->xbf_count);
for (free = flist->xbf_first; free; free = free->xbfi_next)
- xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
+ xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
free->xbfi_blockcount);
- tres.tr_logres = ntp->t_log_res;
- tres.tr_logcount = ntp->t_log_count;
- tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
- ntp = xfs_trans_dup(*tp);
- error = xfs_trans_commit(*tp, 0);
- *tp = ntp;
+ error = xfs_trans_roll(tp, NULL);
*committed = 1;
/*
* We have a new transaction, so we should return committed=1,
if (error)
return error;
- /*
- * transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(ntp->t_ticket);
-
- error = xfs_trans_reserve(ntp, &tres, 0, 0);
- if (error)
- return error;
- efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
+ efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
for (free = flist->xbf_first; free != NULL; free = next) {
next = free->xbfi_next;
- if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
+ if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
free->xbfi_blockcount))) {
/*
* The bmap free list will be cleaned up at a
* happens, since this transaction may not be
* dirty yet.
*/
- mp = ntp->t_mountp;
+ mp = (*tp)->t_mountp;
if (!XFS_FORCED_SHUTDOWN(mp))
xfs_force_shutdown(mp,
(error == -EFSCORRUPTED) ?
SHUTDOWN_META_IO_ERROR);
return error;
}
- xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
+ xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
free->xbfi_blockcount);
xfs_bmap_del_free(flist, NULL, free);
}
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return -EAGAIN;
}
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
if (need_iolock)
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
* If we get an error at this point we simply don't
* bother truncating the file.
*/
- xfs_trans_cancel(tp,
- (XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT));
+ xfs_trans_cancel(tp);
} else {
- error = xfs_trans_commit(tp,
- XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (!error)
xfs_inode_clear_eofblocks_tag(ip);
}
* Free the transaction structure.
*/
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
goto error0;
}
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) {
break;
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
break;
ASSERT(imap.br_blockcount >= 1);
ASSERT(imap.br_startoff == offset_fsb);
+ ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+
+ if (imap.br_startblock == HOLESTARTBLOCK ||
+ imap.br_state == XFS_EXT_UNWRITTEN) {
+ /* skip the entire extent */
+ lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
+ imap.br_blockcount) - 1;
+ continue;
+ }
+
lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
if (lastoffset > endoff)
lastoffset = endoff;
- if (imap.br_startblock == HOLESTARTBLOCK)
- continue;
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- if (imap.br_state == XFS_EXT_UNWRITTEN)
+
+ /* DAX can just zero the backing device directly */
+ if (IS_DAX(VFS_I(ip))) {
+ error = dax_zero_page_range(VFS_I(ip), offset,
+ lastoffset - offset + 1,
+ xfs_get_blocks_direct);
+ if (error)
+ return error;
continue;
+ }
error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp,
* Free the transaction structure.
*/
ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
goto error0;
}
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
error0:
xfs_bmap_cancel(&free_list);
error1:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out;
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
break;
}
if (error)
goto out;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
}
return error;
out:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
goto out;
out_trans_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out;
}
return error;
}
-xfs_caddr_t
+void *
xfs_buf_offset(
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
size_t offset)
{
struct page *page;
offset += bp->b_offset;
page = bp->b_pages[offset >> PAGE_SHIFT];
- return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+ return page_address(page) + (offset & (PAGE_SIZE-1));
}
/*
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+extern void *xfs_buf_offset(struct xfs_buf *, size_t);
/* Delayed Write Buffer Routines */
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
struct xfs_buf *bp;
struct xfs_trans *tp = NULL;
int error;
- int cancelflags = 0;
-
dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
XFS_QM_DQALLOC_SPACE_RES(mp), 0);
if (error)
goto error1;
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
}
/*
* allocate (ENOENT).
*/
trace_xfs_dqread_fail(dqp);
- cancelflags |= XFS_TRANS_ABORT;
goto error1;
}
xfs_trans_brelse(tp, bp);
if (tp) {
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error0;
}
error1:
if (tp)
- xfs_trans_cancel(tp, cancelflags);
+ xfs_trans_cancel(tp);
error0:
xfs_qm_dqdestroy(dqp);
*O_dqpp = NULL;
struct xfs_mount *mp,
const char *filename,
int linenum,
- inst_t *ra)
+ void *ra)
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
void *p,
const char *filename,
int linenum,
- inst_t *ra)
+ void *ra)
{
if (level <= xfs_error_level)
xfs_hex_dump(p, 64);
struct xfs_mount;
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
- const char *filename, int linenum, inst_t *ra);
+ const char *filename, int linenum, void *ra);
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
- int linenum, inst_t *ra);
+ int linenum, void *ra);
extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERROR_REPORT(e, lvl, mp) \
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
- efip->efi_format.efi_id = (__psint_t)(void*)efip;
+ efip->efi_format.efi_id = (uintptr_t)(void *)efip;
atomic_set(&efip->efi_next_extent, 0);
atomic_set(&efip->efi_refcount, 2);
}
/*
- * xfs_iozero
+ * xfs_iozero clears the specified range supplied via the page cache (except in
+ * the DAX case). Writes through the page cache will allocate blocks over holes,
+ * though the callers usually map the holes first and avoid them. If a block is
+ * not completely zeroed, then it will be read from disk before being partially
+ * zeroed.
*
- * xfs_iozero clears the specified range of buffer supplied,
- * and marks all the affected blocks as valid and modified. If
- * an affected block is not allocated, it will be allocated. If
- * an affected block is not completely overwritten, and is not
- * valid before the operation, it will be read from disk before
- * being partially zeroed.
+ * In the DAX case, we can just directly write to the underlying pages. This
+ * will not allocate blocks, but will avoid holes and unwritten extents and so
+ * not do unnecessary work.
*/
int
xfs_iozero(
{
struct page *page;
struct address_space *mapping;
- int status;
+ int status = 0;
+
mapping = VFS_I(ip)->i_mapping;
do {
if (bytes > count)
bytes = count;
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
- if (status)
- break;
+ if (IS_DAX(VFS_I(ip))) {
+ status = dax_zero_page_range(VFS_I(ip), pos, bytes,
+ xfs_get_blocks_direct);
+ if (status)
+ break;
+ } else {
+ status = pagecache_write_begin(NULL, mapping, pos, bytes,
+ AOP_FLAG_UNINTERRUPTIBLE,
+ &page, &fsdata);
+ if (status)
+ break;
- zero_user(page, offset, bytes);
+ zero_user(page, offset, bytes);
- status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
- page, fsdata);
- WARN_ON(status <= 0); /* can't return less than zero! */
+ status = pagecache_write_end(NULL, mapping, pos, bytes,
+ bytes, page, fsdata);
+ WARN_ON(status <= 0); /* can't return less than zero! */
+ status = 0;
+ }
pos += bytes;
count -= bytes;
- status = 0;
} while (count);
return status;
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (flags & XFS_PREALLOC_SYNC)
xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
/*
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
- if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
+ if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+ /* for dax, we need to avoid the page cache */
+ if (IS_DAX(VFS_I(ip)))
+ ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
+ else
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
- if ((pos | count) & target->bt_logical_sectormask)
+ if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */
out:
xfs_rw_iunlock(ip, iolock);
- /* No fallback to buffered IO on errors for XFS. */
- ASSERT(ret < 0 || ret == count);
+ /*
+ * No fallback to buffered IO on errors for XFS. DAX can result in
+ * partial writes, but direct IO will either complete fully or fail.
+ */
+ ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
return ret;
}
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (unlikely(iocb->ki_flags & IOCB_DIRECT))
+ if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
ret = xfs_file_dio_aio_write(iocb, from);
else
ret = xfs_file_buffered_aio_write(iocb, from);
return xfs_readdir(ip, ctx, bufsize);
}
-STATIC int
-xfs_file_mmap(
- struct file *filp,
- struct vm_area_struct *vma)
-{
- vma->vm_ops = &xfs_file_vm_ops;
-
- file_accessed(filp);
- return 0;
-}
-
/*
* This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data().
* ordering of:
*
* mmap_sem (MM)
- * i_mmap_lock (XFS - truncate serialisation)
- * page_lock (MM)
- * i_lock (XFS - extent map serialisation)
+ * sb_start_pagefault(vfs, freeze)
+ * i_mmap_lock (XFS - truncate serialisation)
+ * page_lock (MM)
+ * i_lock (XFS - extent map serialisation)
+ */
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
*/
STATIC int
-xfs_filemap_fault(
+xfs_filemap_page_mkwrite(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
- int error;
+ struct inode *inode = file_inode(vma->vm_file);
+ int ret;
- trace_xfs_filemap_fault(ip);
+ trace_xfs_filemap_page_mkwrite(XFS_I(inode));
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- error = filemap_fault(vma, vmf);
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- return error;
+ if (IS_DAX(inode)) {
+ ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
+ xfs_end_io_dax_write);
+ } else {
+ ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ ret = block_page_mkwrite_return(ret);
+ }
+
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ sb_end_pagefault(inode->i_sb);
+
+ return ret;
}
-/*
- * mmap()d file has taken write protection fault and is being made writable. We
- * can set the page state up correctly for a writable page, which means we can
- * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
- * mapping.
- */
STATIC int
-xfs_filemap_page_mkwrite(
+xfs_filemap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
- int error;
+ struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
+ int ret;
+
+ trace_xfs_filemap_fault(ip);
- trace_xfs_filemap_page_mkwrite(ip);
+ /* DAX can shortcut the normal fault path on write faults! */
+ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
+ return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ ret = filemap_fault(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
- return error;
+ return ret;
+}
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+ .fault = xfs_filemap_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = xfs_filemap_page_mkwrite,
+};
+
+STATIC int
+xfs_file_mmap(
+ struct file *filp,
+ struct vm_area_struct *vma)
+{
+ file_accessed(filp);
+ vma->vm_ops = &xfs_file_vm_ops;
+ if (IS_DAX(file_inode(filp)))
+ vma->vm_flags |= VM_MIXEDMAP;
+ return 0;
}
const struct file_operations xfs_file_operations = {
#endif
.fsync = xfs_dir_fsync,
};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
- .fault = xfs_filemap_fault,
- .map_pages = filemap_map_pages,
- .page_mkwrite = xfs_filemap_page_mkwrite,
-};
goto next_ag;
}
- longest = xfs_alloc_longest_free_extent(mp, pag);
+ longest = xfs_alloc_longest_free_extent(mp, pag,
+ xfs_alloc_min_freelist(mp, pag));
if (((minlen && longest >= minlen) ||
(!minlen && pag->pagf_freeblks >= minfree)) &&
(!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
(xfs_sb_version_hasftype(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
(xfs_sb_version_hasfinobt(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
+ XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
+ (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
+ XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
XFS_GROWFS_SPACE_RES(mp), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
if (dpct)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
return error;
return saved_error ? saved_error : error;
error0:
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
{
xfs_trans_t *tp;
- xfs_trans_t *ntp;
xfs_inode_t *ip;
xfs_buf_t *ialloc_context = NULL;
int code;
* to succeed the second time.
*/
if (ialloc_context) {
- struct xfs_trans_res tres;
-
/*
* Normally, xfs_trans_commit releases all the locks.
* We call bhold to hang on to the ialloc_context across
* allocation group.
*/
xfs_trans_bhold(tp, ialloc_context);
- /*
- * Save the log reservation so we can use
- * them in the next transaction.
- */
- tres.tr_logres = xfs_trans_get_log_res(tp);
- tres.tr_logcount = xfs_trans_get_log_count(tp);
/*
* We want the quota changes to be associated with the next
tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
}
- ntp = xfs_trans_dup(tp);
- code = xfs_trans_commit(tp, 0);
- tp = ntp;
- if (committed != NULL) {
+ code = xfs_trans_roll(&tp, 0);
+ if (committed != NULL)
*committed = 1;
- }
- /*
- * If we get an error during the commit processing,
- * release the buffer that is still held and return
- * to the caller.
- */
- if (code) {
- xfs_buf_relse(ialloc_context);
- if (dqinfo) {
- tp->t_dqinfo = dqinfo;
- xfs_trans_free_dqinfo(tp);
- }
- *tpp = ntp;
- *ipp = NULL;
- return code;
- }
-
- /*
- * transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(tp->t_ticket);
- tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
- code = xfs_trans_reserve(tp, &tres, 0, 0);
/*
* Re-attach the quota info that we detached from prev trx.
if (code) {
xfs_buf_relse(ialloc_context);
- *tpp = ntp;
+ *tpp = tp;
*ipp = NULL;
return code;
}
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- uint cancel_flags;
int committed;
prid_t prid;
struct xfs_dquot *udqp = NULL;
tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
}
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-
/*
* Initially assume that the file does not exist and
* reserve the resources for that case. If that is not
resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto out_trans_cancel;
- }
+
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
if (error) {
if (error == -ENOSPC)
goto out_trans_cancel;
- goto out_trans_abort;
+ goto out_trans_cancel;
}
/*
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) {
ASSERT(error != -ENOSPC);
- goto out_trans_abort;
+ goto out_trans_cancel;
}
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
- out_trans_abort:
- cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
int error;
- uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
prid_t prid;
struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL;
resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto out_trans_cancel;
- }
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0);
if (error) {
if (error == -ENOSPC)
goto out_trans_cancel;
- goto out_trans_abort;
+ goto out_trans_cancel;
}
if (mp->m_flags & XFS_MOUNT_WSYNC)
ip->i_d.di_nlink--;
error = xfs_iunlink(tp, ip);
if (error)
- goto out_trans_abort;
+ goto out_trans_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
*ipp = ip;
return 0;
- out_trans_abort:
- cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int cancel_flags;
int committed;
int resblks;
goto std_return;
tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto error_return;
- }
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
if (sip->i_d.di_nlink == 0) {
error = xfs_iunlink_remove(tp, sip);
if (error)
- goto abort_return;
+ goto error_return;
}
error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
&first_block, &free_list, resblks);
if (error)
- goto abort_return;
+ goto error_return;
xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
error = xfs_bumplink(tp, sip);
if (error)
- goto abort_return;
+ goto error_return;
/*
* If this is a synchronous mount, make sure that the
error = xfs_bmap_finish (&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
- goto abort_return;
+ goto error_return;
}
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ return xfs_trans_commit(tp);
- abort_return:
- cancel_flags |= XFS_TRANS_ABORT;
error_return:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
std_return:
return error;
}
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp;
- struct xfs_trans *ntp;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
xfs_fileoff_t first_unmap_block;
if (error)
goto out_bmap_cancel;
- if (committed) {
- /*
- * Mark the inode dirty so it will be logged and
- * moved forward in the log as part of every commit.
- */
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- }
-
- ntp = xfs_trans_dup(tp);
- error = xfs_trans_commit(tp, 0);
- tp = ntp;
-
- xfs_trans_ijoin(tp, ip, 0);
-
- if (error)
- goto out;
-
- /*
- * Transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(tp->t_ticket);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+ error = xfs_trans_roll(&tp, ip);
if (error)
goto out;
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
ASSERT(ip->i_d.di_nextents == 0);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error_unlock;
return 0;
error_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
error_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
} else {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
}
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_trans_cancel(tp);
return error;
}
__func__, error);
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
}
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
if (error)
xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
__func__, error);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
__func__, error);
*/
STATIC int
xfs_ifree_cluster(
- xfs_inode_t *free_ip,
- xfs_trans_t *tp,
- xfs_ino_t inum)
+ xfs_inode_t *free_ip,
+ xfs_trans_t *tp,
+ struct xfs_icluster *xic)
{
xfs_mount_t *mp = free_ip->i_mount;
int blks_per_cluster;
int inodes_per_cluster;
int nbufs;
int i, j;
+ int ioffset;
xfs_daddr_t blkno;
xfs_buf_t *bp;
xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
struct xfs_perag *pag;
+ xfs_ino_t inum;
+ inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
blks_per_cluster = xfs_icluster_size_fsb(mp);
inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
nbufs = mp->m_ialloc_blks / blks_per_cluster;
for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
+ /*
+ * The allocation bitmap tells us which inodes of the chunk were
+ * physically allocated. Skip the cluster if an inode falls into
+ * a sparse region.
+ */
+ ioffset = inum - xic->first_ino;
+ if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
+ ASSERT(do_mod(ioffset, inodes_per_cluster) == 0);
+ continue;
+ }
+
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
xfs_bmap_free_t *flist)
{
int error;
- int delete;
- xfs_ino_t first_ino;
+ struct xfs_icluster xic = { 0 };
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_d.di_nlink == 0);
if (error)
return error;
- error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
+ error = xfs_difree(tp, ip->i_ino, flist, &xic);
if (error)
return error;
ip->i_d.di_gen++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (delete)
- error = xfs_ifree_cluster(ip, tp, first_ino);
+ if (xic.deleted)
+ error = xfs_ifree_cluster(ip, tp, &xic);
return error;
}
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int cancel_flags;
int committed;
uint resblks;
tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
else
tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/*
* We try to get the real space reservation first,
}
if (error) {
ASSERT(error != -ENOSPC);
- cancel_flags = 0;
goto out_trans_cancel;
}
/*
* If we're removing a directory perform some additional validation.
*/
- cancel_flags |= XFS_TRANS_ABORT;
if (is_dir) {
ASSERT(ip->i_d.di_nlink >= 2);
if (ip->i_d.di_nlink != 2) {
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto std_return;
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
std_return:
return error;
}
error = xfs_bmap_finish(&tp, free_list, &committed);
if (error) {
xfs_bmap_cancel(free_list);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ return xfs_trans_commit(tp);
}
/*
out_trans_abort:
xfs_bmap_cancel(free_list);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
- int cancel_flags = 0;
int spaceres;
int error;
}
if (error)
goto out_trans_cancel;
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/*
* Attach the dquots to the inodes
error = xfs_dir_createname(tp, target_dp, target_name,
src_ip->i_ino, &first_block,
&free_list, spaceres);
- if (error == -ENOSPC)
- goto out_bmap_cancel;
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
if (new_parent && src_is_directory) {
error = xfs_bumplink(tp, target_dp);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
} else { /* target_ip != NULL */
/*
src_ip->i_ino,
&first_block, &free_list, spaceres);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
*/
error = xfs_droplink(tp, target_ip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
if (src_is_directory) {
/*
*/
error = xfs_droplink(tp, target_ip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
} /* target_ip != NULL */
&first_block, &free_list, spaceres);
ASSERT(error != -EEXIST);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
/*
*/
error = xfs_droplink(tp, src_dp);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
}
/*
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
&first_block, &free_list, spaceres);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
/*
* For whiteouts, we need to bump the link count on the whiteout inode.
ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
error = xfs_iunlink_remove(tp, wip);
if (error)
- goto out_trans_abort;
+ goto out_bmap_cancel;
xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
/*
IRELE(wip);
return error;
-out_trans_abort:
- cancel_flags |= XFS_TRANS_ABORT;
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
if (wip)
IRELE(wip);
return error;
ASSERT(ip->i_d.di_version > 1);
/* set *dip = inode's place in the buffer */
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
+ dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
ip->i_d.di_dmstate = state;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
return error;
}
return tp;
out_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return ERR_PTR(error);
}
else
ip->i_d.di_extsize = 0;
- code = xfs_trans_commit(tp, 0);
+ code = xfs_trans_commit(tp);
/*
* Release any dquot(s) the inode had kept before chown.
return code;
error_trans_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
error_free_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(pdqp);
error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_drop_write;
}
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
out_drop_write:
mnt_drop_write_file(filp);
return error;
* Check for running out of space, note: need lock to return
*/
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_unlock;
xfs_bmap_cancel(&free_list);
xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
out_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
nres, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (error)
goto trans_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error0;
trans_cancel:
xfs_bmap_cancel(&free_list);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
error0:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
if (error)
goto error_on_bmapi_transaction;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
error_on_bmapi_transaction:
xfs_bmap_cancel(&free_list);
- xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return 0;
out_trans_cancel:
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_dqrele:
xfs_qm_dqrele(udqp);
struct xfs_trans *tp;
int error;
uint lock_flags = 0;
- uint commit_flags = 0;
bool did_zeroing = false;
trace_xfs_setattr(ip);
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
- error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
+ if (IS_DAX(inode))
+ error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
+ else
+ error = block_truncate_page(inode->i_mapping, newsize,
+ xfs_get_blocks);
if (error)
return error;
truncate_setsize(inode, newsize);
if (error)
goto out_trans_cancel;
- commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
if (newsize <= oldsize) {
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
if (error)
- goto out_trans_abort;
+ goto out_trans_cancel;
/*
* Truncated "down", so we're removing references to old data
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
out_unlock:
if (lock_flags)
xfs_iunlock(ip, lock_flags);
return error;
-out_trans_abort:
- commit_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, commit_flags);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
}
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
- return xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
struct inode *inode,
struct xfs_inode *ip)
{
- if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+ uint16_t flags = ip->i_d.di_flags;
+
+ inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
+ S_NOATIME | S_DAX);
+
+ if (flags & XFS_DIFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ if (flags & XFS_DIFLAG_APPEND)
inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+ if (flags & XFS_DIFLAG_SYNC)
inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+ if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
+ /* XXX: Also needs an on-disk per inode flag! */
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+ inode->i_flags |= S_DAX;
}
/*
}
irec->ir_free |= xfs_inobt_maskn(0, idx);
- *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
+ *icount = irec->ir_count - irec->ir_freecount;
}
return 0;
goto del_cursor;
if (icount) {
irbp->ir_startino = r.ir_startino;
+ irbp->ir_holemask = r.ir_holemask;
+ irbp->ir_count = r.ir_count;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
* If this chunk has any allocated inodes, save it.
* Also start read-ahead now for this chunk.
*/
- if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
+ if (r.ir_freecount < r.ir_count) {
xfs_bulkstat_ichunk_ra(mp, agno, &r);
irbp->ir_startino = r.ir_startino;
+ irbp->ir_holemask = r.ir_holemask;
+ irbp->ir_count = r.ir_count;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
- icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
+ icount += r.ir_count - r.ir_freecount;
}
error = xfs_btree_increment(cur, 0, &stat);
if (error || stat == 0) {
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
buffer[bufidx].xi_startino =
XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
- buffer[bufidx].xi_alloccount =
- XFS_INODES_PER_CHUNK - r.ir_freecount;
+ buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
buffer[bufidx].xi_allocmask = ~r.ir_free;
if (++bufidx == bcount) {
long written;
typedef signed long long int __int64_t;
typedef unsigned long long int __uint64_t;
-typedef __uint32_t inst_t; /* an instruction */
-
typedef __s64 xfs_off_t; /* <file offset> type */
typedef unsigned long long xfs_ino_t; /* <inode> type */
typedef __s64 xfs_daddr_t; /* <disk address> type */
-typedef char * xfs_caddr_t; /* <core address> type */
typedef __u32 xfs_dev_t;
typedef __u32 xfs_nlink_t;
-/* __psint_t is the same size as a pointer */
-#if (BITS_PER_LONG == 32)
-typedef __int32_t __psint_t;
-typedef __uint32_t __psunsigned_t;
-#elif (BITS_PER_LONG == 64)
-typedef __int64_t __psint_t;
-typedef __uint64_t __psunsigned_t;
-#else
-#error BITS_PER_LONG must be 32 or 64
-#endif
-
#include "xfs_types.h"
#include "kmem.h"
STATIC void
xlog_verify_dest_ptr(
struct xlog *log,
- char *ptr);
+ void *ptr);
STATIC void
xlog_verify_grant_tail(
struct xlog *log);
struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
- uint flags)
+ bool regrant)
{
struct xlog *log = mp->m_log;
xfs_lsn_t lsn = 0;
(((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
(xlog_commit_record(log, ticket, iclog, &lsn)))) {
lsn = (xfs_lsn_t) -1;
- if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
- flags |= XFS_LOG_REL_PERM_RESERV;
- }
+ regrant = false;
}
- if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
- (flags & XFS_LOG_REL_PERM_RESERV)) {
+ if (!regrant) {
trace_xfs_log_done_nonperm(log, ticket);
/*
* request has been made to release a permanent reservation.
*/
xlog_ungrant_log_space(log, ticket);
- xfs_log_ticket_put(ticket);
} else {
trace_xfs_log_done_perm(log, ticket);
ticket->t_flags |= XLOG_TIC_INITED;
}
+ xfs_log_ticket_put(ticket);
return lsn;
}
iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr;
#ifdef DEBUG
- log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
+ log->l_iclog_bak[i] = &iclog->ic_header;
#endif
head = &iclog->ic_header;
memset(head, 0, sizeof(xlog_rec_header_t));
int i, j, k;
int size = iclog->ic_offset + roundoff;
__be32 cycle_lsn;
- xfs_caddr_t dp;
+ char *dp;
cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
void
xlog_verify_dest_ptr(
struct xlog *log,
- char *ptr)
+ void *ptr)
{
int i;
int good_ptr = 0;
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
xlog_in_core_2_t *xhdr;
- xfs_caddr_t ptr;
- xfs_caddr_t base_ptr;
- __psint_t field_offset;
+ void *base_ptr, *ptr, *p;
+ ptrdiff_t field_offset;
__uint8_t clientid;
int len, i, j, k, op_len;
int idx;
if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
- ptr = (xfs_caddr_t) &iclog->ic_header;
- for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
- ptr += BBSIZE) {
+ base_ptr = ptr = &iclog->ic_header;
+ p = &iclog->ic_header;
+ for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: unexpected magic num",
__func__);
/* check fields */
len = be32_to_cpu(iclog->ic_header.h_num_logops);
- ptr = iclog->ic_datap;
- base_ptr = ptr;
- ophead = (xlog_op_header_t *)ptr;
+ base_ptr = ptr = iclog->ic_datap;
+ ophead = ptr;
xhdr = iclog->ic_data;
for (i = 0; i < len; i++) {
- ophead = (xlog_op_header_t *)ptr;
+ ophead = ptr;
/* clientid is only 1 byte */
- field_offset = (__psint_t)
- ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
+ p = &ophead->oh_clientid;
+ field_offset = p - base_ptr;
if (!syncing || (field_offset & 0x1ff)) {
clientid = ophead->oh_clientid;
} else {
- idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
+ idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
(unsigned long)field_offset);
/* check length */
- field_offset = (__psint_t)
- ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
+ p = &ophead->oh_len;
+ field_offset = p - base_ptr;
if (!syncing || (field_offset & 0x1ff)) {
op_len = be32_to_cpu(ophead->oh_len);
} else {
- idx = BTOBBT((__psint_t)&ophead->oh_len -
- (__psint_t)iclog->ic_datap);
+ idx = BTOBBT((uintptr_t)&ophead->oh_len -
+ (uintptr_t)iclog->ic_datap);
if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
#define XFS_LSN_CMP(x,y) _lsn_cmp(x,y)
-/*
- * Macros, structures, prototypes for interface to the log manager.
- */
-
-/*
- * Flags to xfs_log_done()
- */
-#define XFS_LOG_REL_PERM_RESERV 0x1
-
/*
* Flags to xfs_log_force()
*
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
- uint flags);
+ bool regrant);
int _xfs_log_force(struct xfs_mount *mp,
uint flags,
int *log_forced);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn, int flags);
+ xfs_lsn_t *commit_lsn, bool regrant);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
spin_unlock(&cil->xc_push_lock);
/* xfs_log_done always frees the ticket on error. */
- commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
+ commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false);
if (commit_lsn == -1)
goto out_abort;
struct xfs_mount *mp,
struct xfs_trans *tp,
xfs_lsn_t *commit_lsn,
- int flags)
+ bool regrant)
{
struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
- int log_flags = 0;
-
- if (flags & XFS_TRANS_RELEASE_LOG_RES)
- log_flags = XFS_LOG_REL_PERM_RESERV;
/* lock out background commit */
down_read(&cil->xc_ctx_lock);
if (commit_lsn)
*commit_lsn = tp->t_commit_lsn;
- xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+ xfs_log_done(mp, tp->t_ticket, NULL, regrant);
xfs_trans_unreserve_and_mod_sb(tp);
/*
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
+ xfs_trans_free_items(tp, tp->t_commit_lsn, false);
xlog_cil_push_background(log);
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
- char *l_iclog_bak[XLOG_MAX_ICLOGS];
+ void *l_iclog_bak[XLOG_MAX_ICLOGS];
#endif
};
* Return the address of the start of the given block number's data
* in a log buffer. The buffer covers a log sector-aligned region.
*/
-STATIC xfs_caddr_t
+STATIC char *
xlog_align(
struct xlog *log,
xfs_daddr_t blk_no,
xfs_daddr_t blk_no,
int nbblks,
struct xfs_buf *bp,
- xfs_caddr_t *offset)
+ char **offset)
{
int error;
xfs_daddr_t blk_no, /* block to read from */
int nbblks, /* blocks to read */
struct xfs_buf *bp,
- xfs_caddr_t offset)
+ char *offset)
{
- xfs_caddr_t orig_offset = bp->b_addr;
+ char *orig_offset = bp->b_addr;
int orig_len = BBTOB(bp->b_length);
int error, error2;
xfs_daddr_t *last_blk,
uint cycle)
{
- xfs_caddr_t offset;
+ char *offset;
xfs_daddr_t mid_blk;
xfs_daddr_t end_blk;
uint mid_cycle;
uint cycle;
xfs_buf_t *bp;
xfs_daddr_t bufblks;
- xfs_caddr_t buf = NULL;
+ char *buf = NULL;
int error = 0;
/*
{
xfs_daddr_t i;
xfs_buf_t *bp;
- xfs_caddr_t offset = NULL;
+ char *offset = NULL;
xlog_rec_header_t *head = NULL;
int error = 0;
int smallmem = 0;
xfs_daddr_t *return_head_blk)
{
xfs_buf_t *bp;
- xfs_caddr_t offset;
+ char *offset;
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
int num_scan_bblks;
uint first_half_cycle, last_half_cycle;
{
xlog_rec_header_t *rhead;
xlog_op_header_t *op_head;
- xfs_caddr_t offset = NULL;
+ char *offset = NULL;
xfs_buf_t *bp;
int error, i, found;
xfs_daddr_t umount_data_blk;
xfs_daddr_t *blk_no)
{
xfs_buf_t *bp;
- xfs_caddr_t offset;
+ char *offset;
uint first_cycle, last_cycle;
xfs_daddr_t new_blk, last_blk, start_blk;
xfs_daddr_t num_scan_bblks;
STATIC void
xlog_add_record(
struct xlog *log,
- xfs_caddr_t buf,
+ char *buf,
int cycle,
int block,
int tail_cycle,
int tail_cycle,
int tail_block)
{
- xfs_caddr_t offset;
+ char *offset;
xfs_buf_t *bp;
int balign, ealign;
int sectbb = log->l_sectBBsize;
return -EFSCORRUPTED;
}
- buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
- next_unlinked_offset);
+ buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
*buffer_nextp = *logged_nextp;
/*
* have to leave the inode in a consistent state for whoever
* reads it next....
*/
- xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+ xfs_dinode_calc_crc(mp,
xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
}
xfs_buf_t *bp;
xfs_dinode_t *dip;
int len;
- xfs_caddr_t src;
- xfs_caddr_t dest;
+ char *src;
+ char *dest;
int error;
int attr_index;
uint fields;
goto out_release;
}
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
+ dip = xfs_buf_offset(bp, in_f->ilf_boffset);
/*
* Make sure the place we're flushing out to really looks
return error;
ASSERT(bp);
- ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
+ ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
/*
* If the dquot has an LSN in it, recover the dquot only if it's less
return -EINVAL;
}
- /* existing allocation is fixed value */
- ASSERT(count == mp->m_ialloc_inos);
- ASSERT(length == mp->m_ialloc_blks);
- if (count != mp->m_ialloc_inos ||
- length != mp->m_ialloc_blks) {
- xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
+ /*
+ * The inode chunk is either full or sparse and we only support
+ * m_ialloc_min_blks sized sparse allocations at this time.
+ */
+ if (length != mp->m_ialloc_blks &&
+ length != mp->m_ialloc_min_blks) {
+ xfs_warn(log->l_mp,
+ "%s: unsupported chunk length", __FUNCTION__);
+ return -EINVAL;
+ }
+
+ /* verify inode count is consistent with extent length */
+ if ((count >> mp->m_sb.sb_inopblog) != length) {
+ xfs_warn(log->l_mp,
+ "%s: inconsistent inode count and chunk length",
+ __FUNCTION__);
return -EINVAL;
}
XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
return 0;
- xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
- be32_to_cpu(icl->icl_gen));
+ xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
+ be32_to_cpu(icl->icl_gen));
return 0;
}
xlog_recover_add_to_cont_trans(
struct xlog *log,
struct xlog_recover *trans,
- xfs_caddr_t dp,
+ char *dp,
int len)
{
xlog_recover_item_t *item;
- xfs_caddr_t ptr, old_ptr;
+ char *ptr, *old_ptr;
int old_len;
if (list_empty(&trans->r_itemq)) {
/* finish copying rest of trans header */
xlog_recover_add_item(&trans->r_itemq);
- ptr = (xfs_caddr_t) &trans->r_theader +
+ ptr = (char *)&trans->r_theader +
sizeof(xfs_trans_header_t) - len;
memcpy(ptr, dp, len);
return 0;
xlog_recover_add_to_trans(
struct xlog *log,
struct xlog_recover *trans,
- xfs_caddr_t dp,
+ char *dp,
int len)
{
xfs_inode_log_format_t *in_f; /* any will do */
xlog_recover_item_t *item;
- xfs_caddr_t ptr;
+ char *ptr;
if (!len)
return 0;
xlog_recovery_process_trans(
struct xlog *log,
struct xlog_recover *trans,
- xfs_caddr_t dp,
+ char *dp,
unsigned int len,
unsigned int flags,
int pass)
struct hlist_head rhash[],
struct xlog_rec_header *rhead,
struct xlog_op_header *ohead,
- xfs_caddr_t dp,
- xfs_caddr_t end,
+ char *dp,
+ char *end,
int pass)
{
struct xlog_recover *trans;
struct xlog *log,
struct hlist_head rhash[],
struct xlog_rec_header *rhead,
- xfs_caddr_t dp,
+ char *dp,
int pass)
{
struct xlog_op_header *ohead;
- xfs_caddr_t end;
+ char *end;
int num_logops;
int error;
}
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
return error;
abort_error:
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
goto out_error;
return;
out_abort:
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
out_error:
xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
return;
STATIC int
xlog_unpack_data_crc(
struct xlog_rec_header *rhead,
- xfs_caddr_t dp,
+ char *dp,
struct xlog *log)
{
__le32 crc;
STATIC int
xlog_unpack_data(
struct xlog_rec_header *rhead,
- xfs_caddr_t dp,
+ char *dp,
struct xlog *log)
{
int i, j, k;
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
- xfs_caddr_t offset;
+ char *offset;
xfs_buf_t *hbp, *dbp;
int error = 0, h_size;
int bblks, split_bblks;
mp->m_inode_cluster_size = new_size;
}
+ /*
+ * If enabled, sparse inode chunk alignment is expected to match the
+ * cluster size. Full inode chunk alignment must match the chunk size,
+ * but that is checked on sb read verification...
+ */
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
+ mp->m_sb.sb_spino_align !=
+ XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+ xfs_warn(mp,
+ "Sparse inode block alignment (%u) must match cluster size (%llu).",
+ mp->m_sb.sb_spino_align,
+ XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+ error = -EINVAL;
+ goto out_remove_uuid;
+ }
+
/*
* Set inode alignment fields
*/
__uint64_t m_flags; /* global mount flags */
int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */
+ int m_ialloc_min_blks;/* min blocks in sparse inode
+ * allocation */
int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */
allocator */
#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
+#define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */
+
/*
* Default minimum read and write sizes.
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_drop_iolock;
}
}
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
out_drop_iolock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
&committed);
if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
return error;
}
}
spin_unlock(&mp->m_sb_lock);
xfs_log_sb(tp);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_alert(mp, "%s failed (error %d)!", __func__, error);
tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
goto out_put;
}
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
goto out_unlock;
}
ASSERT(ip->i_d.di_nextents == 0);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out_rele;
}
dqp->dq_flags |= XFS_DQ_DIRTY;
xfs_trans_log_dquot(tp, dqp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
out_rele:
xfs_qm_dqrele(dqp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
* We don't care about quotoff's performance.
*/
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
- return error;
+ return xfs_trans_commit(tp);
}
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
goto out;
}
* We don't care about quotoff's performance.
*/
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
goto out;
typedef struct xfs_dqtrx {
struct xfs_dquot *qt_dquot; /* the dquot this refers to */
ulong qt_blk_res; /* blks reserved on a dquot */
- ulong qt_blk_res_used; /* blks used from the reservation */
ulong qt_ino_res; /* inode reserved on a dquot */
ulong qt_ino_res_used; /* inodes used from the reservation */
long qt_bcount_delta; /* dquot blk count changes */
* Allocate space to the file, as necessary.
*/
while (oblocks < nblocks) {
- int cancelflags = 0;
xfs_trans_t *tp;
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
resblks, 0);
if (error)
goto error_cancel;
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
/*
* Lock the inode.
*/
* Allocate blocks to the bitmap file.
*/
nmap = 1;
- cancelflags |= XFS_TRANS_ABORT;
error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
XFS_BMAPI_METADATA, &firstblock,
resblks, &map, &nmap, &flist);
error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
goto error_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto error;
/*
* Now we need to clear the allocated blocks.
* Do this one block per transaction, to keep it simple.
*/
- cancelflags = 0;
for (bno = map.br_startoff, fsbno = map.br_startblock;
bno < map.br_startoff + map.br_blockcount;
bno++, fsbno++) {
if (bp == NULL) {
error = -EIO;
error_cancel:
- xfs_trans_cancel(tp, cancelflags);
+ xfs_trans_cancel(tp);
goto error;
}
memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
/*
* Commit the transaction.
*/
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
goto error;
}
bmbno < nrbmblocks;
bmbno++) {
xfs_trans_t *tp;
- int cancelflags = 0;
*nmp = *mp;
nsbp = &nmp->m_sb;
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
- cancelflags |= XFS_TRANS_ABORT;
/*
* Get the summary inode into the transaction.
*/
nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
if (error) {
error_cancel:
- xfs_trans_cancel(tp, cancelflags);
+ xfs_trans_cancel(tp);
break;
}
/*
mp->m_rsumlevels = nrsumlevels;
mp->m_rsumsize = nrsumsize;
- error = xfs_trans_commit(tp, 0);
+ error = xfs_trans_commit(tp);
if (error)
break;
}
#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
+#define MNTOPT_DAX "dax" /* Enable direct access to bdev pages */
+
/*
* Table driven mount option parser.
*
mp->m_flags |= XFS_MOUNT_DISCARD;
} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
mp->m_flags &= ~XFS_MOUNT_DISCARD;
+#ifdef CONFIG_FS_DAX
+ } else if (!strcmp(this_char, MNTOPT_DAX)) {
+ mp->m_flags |= XFS_MOUNT_DAX;
+#endif
} else {
xfs_warn(mp, "unknown mount option [%s].", this_char);
return -EINVAL;
}
struct proc_xfs_info {
- int flag;
- char *str;
+ uint64_t flag;
+ char *str;
};
STATIC int
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
{ XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
+ { XFS_MOUNT_DAX, "," MNTOPT_DAX },
{ 0, NULL }
};
static struct proc_xfs_info xfs_info_unset[] = {
if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
sb->s_flags |= MS_I_VERSION;
+ if (mp->m_flags & XFS_MOUNT_DAX) {
+ xfs_warn(mp,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ if (sb->s_blocksize != PAGE_SIZE) {
+ xfs_alert(mp,
+ "Filesystem block size invalid for DAX Turning DAX off.");
+ mp->m_flags &= ~XFS_MOUNT_DAX;
+ } else if (!sb->s_bdev->bd_disk->fops->direct_access) {
+ xfs_alert(mp,
+ "Block device does not support DAX Turning DAX off.");
+ mp->m_flags &= ~XFS_MOUNT_DAX;
+ }
+ }
+
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
cur_chunk += sizeof(struct xfs_dsymlink_hdr);
}
- memcpy(link + offset, bp->b_addr, byte_cnt);
+ memcpy(link + offset, cur_chunk, byte_cnt);
pathlen -= byte_cnt;
offset += byte_cnt;
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- uint cancel_flags;
int committed;
xfs_fileoff_t first_fsb;
xfs_filblks_t fs_blocks;
return error;
tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
/*
* The symlink will fit into the inode data fork?
* There can't be any attributes so we get the whole variable part.
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
}
- if (error) {
- cancel_flags = 0;
+ if (error)
goto out_trans_cancel;
- }
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
if (error)
goto out_bmap_cancel;
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
- cancel_flags |= XFS_TRANS_ABORT;
out_trans_cancel:
- xfs_trans_cancel(tp, cancel_flags);
+ xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error) {
- xfs_trans_cancel(tp, 0);
+ xfs_trans_cancel(tp);
return error;
}
/*
* Commit the transaction containing extent freeing and EFDs.
*/
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ error = xfs_trans_commit(tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
goto error_unlock;
error_bmap_cancel:
xfs_bmap_cancel(&free_list);
error_trans_cancel:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_trans_cancel(tp);
error_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
__entry->blocks, __entry->shift, __entry->writeio_blocks)
)
+TRACE_EVENT(xfs_irec_merge_pre,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
+ uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
+ TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, agino)
+ __field(uint16_t, holemask)
+ __field(xfs_agino_t, nagino)
+ __field(uint16_t, nholemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agino = agino;
+ __entry->holemask = holemask;
+ __entry->nagino = nagino;
+ __entry->nholemask = holemask;
+ ),
+ TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->agino, __entry->holemask, __entry->nagino,
+ __entry->nholemask)
+)
+
+TRACE_EVENT(xfs_irec_merge_post,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
+ uint16_t holemask),
+ TP_ARGS(mp, agno, agino, holemask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, agino)
+ __field(uint16_t, holemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agino = agino;
+ __entry->holemask = holemask;
+ ),
+ TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->agno, __entry->agino,
+ __entry->holemask)
+)
+
#define DEFINE_IREF_EVENT(name) \
DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
* blocks. Locks and log items, however, are no inherited. They must
* be added to the new transaction explicitly.
*/
-xfs_trans_t *
+STATIC xfs_trans_t *
xfs_trans_dup(
xfs_trans_t *tp)
{
*/
undo_log:
if (resp->tr_logres > 0) {
- int log_flags;
-
- if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
- log_flags = XFS_LOG_REL_PERM_RESERV;
- } else {
- log_flags = 0;
- }
- xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+ xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false);
tp->t_ticket = NULL;
tp->t_log_res = 0;
tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
xfs_trans_free_items(
struct xfs_trans *tp,
xfs_lsn_t commit_lsn,
- int flags)
+ bool abort)
{
struct xfs_log_item_desc *lidp, *next;
if (commit_lsn != NULLCOMMITLSN)
lip->li_ops->iop_committing(lip, commit_lsn);
- if (flags & XFS_TRANS_ABORT)
+ if (abort)
lip->li_flags |= XFS_LI_ABORTED;
lip->li_ops->iop_unlock(lip);
* have already been unlocked as if the commit had succeeded.
* Do not reference the transaction structure after this call.
*/
-int
-xfs_trans_commit(
+static int
+__xfs_trans_commit(
struct xfs_trans *tp,
- uint flags)
+ bool regrant)
{
struct xfs_mount *mp = tp->t_mountp;
xfs_lsn_t commit_lsn = -1;
int error = 0;
- int log_flags = 0;
int sync = tp->t_flags & XFS_TRANS_SYNC;
- /*
- * Determine whether this commit is releasing a permanent
- * log reservation or not.
- */
- if (flags & XFS_TRANS_RELEASE_LOG_RES) {
- ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
- log_flags = XFS_LOG_REL_PERM_RESERV;
- }
-
/*
* If there is nothing to be logged by the transaction,
* then unlock all of the items associated with the
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
- xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
+ xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
*/
xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
- commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+ commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant);
if (commit_lsn == -1 && !error)
error = -EIO;
}
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
+ xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
xfs_trans_free(tp);
XFS_STATS_INC(xs_trans_empty);
return error;
}
+int
+xfs_trans_commit(
+ struct xfs_trans *tp)
+{
+ return __xfs_trans_commit(tp, false);
+}
+
/*
* Unlock all of the transaction's items and free the transaction.
* The transaction must not have modified any of its items, because
*/
void
xfs_trans_cancel(
- xfs_trans_t *tp,
- int flags)
+ struct xfs_trans *tp)
{
- int log_flags;
- xfs_mount_t *mp = tp->t_mountp;
+ struct xfs_mount *mp = tp->t_mountp;
+ bool dirty = (tp->t_flags & XFS_TRANS_DIRTY);
- /*
- * See if the caller is being too lazy to figure out if
- * the transaction really needs an abort.
- */
- if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY))
- flags &= ~XFS_TRANS_ABORT;
/*
* See if the caller is relying on us to shut down the
* filesystem. This happens in paths where we detect
* corruption and decide to give up.
*/
- if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
+ if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
#ifdef DEBUG
- if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
+ if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
struct xfs_log_item_desc *lidp;
list_for_each_entry(lidp, &tp->t_items, lid_trans)
xfs_trans_unreserve_and_mod_sb(tp);
xfs_trans_unreserve_and_mod_dquots(tp);
- if (tp->t_ticket) {
- if (flags & XFS_TRANS_RELEASE_LOG_RES) {
- ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
- log_flags = XFS_LOG_REL_PERM_RESERV;
- } else {
- log_flags = 0;
- }
- xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
- }
+ if (tp->t_ticket)
+ xfs_log_done(mp, tp->t_ticket, NULL, false);
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
+ xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
xfs_trans_free(tp);
}
/*
* Roll from one trans in the sequence of PERMANENT transactions to
* the next: permanent transactions are only flushed out when
- * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
+ * committed with xfs_trans_commit(), but we still want as soon
* as possible to let chunks of it go to the log. So we commit the
* chunk we've been working on and get a new transaction to continue.
*/
* Ensure that the inode is always logged.
*/
trans = *tpp;
- xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
+ if (dp)
+ xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
/*
* Copy the critical parameters from one trans to the next.
* is in progress. The caller takes the responsibility to cancel
* the duplicate transaction that gets returned.
*/
- error = xfs_trans_commit(trans, 0);
+ error = __xfs_trans_commit(trans, true);
if (error)
return error;
trans = *tpp;
- /*
- * transaction commit worked ok so we can drop the extra ticket
- * reference that we gained in xfs_trans_dup()
- */
- xfs_log_ticket_put(trans->t_ticket);
-
-
/*
* Reserve space in the log for th next transaction.
* This also pushes items in the "AIL", the list of logged items,
if (error)
return error;
- xfs_trans_ijoin(trans, dp, 0);
+ if (dp)
+ xfs_trans_ijoin(trans, dp, 0);
return 0;
}
* XFS transaction mechanism exported interfaces that are
* actually macros.
*/
-#define xfs_trans_get_log_res(tp) ((tp)->t_log_res)
-#define xfs_trans_get_log_count(tp) ((tp)->t_log_count)
#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res)
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
*/
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
-xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
uint, uint);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_efd_log_item *,
xfs_fsblock_t,
xfs_extlen_t);
-int xfs_trans_commit(xfs_trans_t *, uint flags);
+int xfs_trans_commit(struct xfs_trans *);
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
-void xfs_trans_cancel(xfs_trans_t *, int);
+void xfs_trans_cancel(xfs_trans_t *);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
{
struct xfs_log_item *lip = cur->item;
- if ((__psint_t)lip & 1)
+ if ((uintptr_t)lip & 1)
lip = xfs_ail_min(ailp);
if (lip)
cur->item = xfs_ail_next(ailp, lip);
list_for_each_entry(cur, &ailp->xa_cursors, list) {
if (cur->item == lip)
cur->item = (struct xfs_log_item *)
- ((__psint_t)cur->item | 1);
+ ((uintptr_t)cur->item | 1);
}
}
* find the place in the AIL where the items belong.
*/
lip = cur ? cur->item : NULL;
- if (!lip || (__psint_t) lip & 1)
+ if (!lip || (uintptr_t)lip & 1)
lip = __xfs_trans_ail_cursor_last(ailp, lsn);
/*
xfs_trans_t *ntp)
{
xfs_dqtrx_t *oq, *nq;
- int i,j;
+ int i, j;
xfs_dqtrx_t *oqa, *nqa;
+ ulong blk_res_used;
if (!otp->t_dqinfo)
return;
* Because the quota blk reservation is carried forward,
* it is also necessary to carry forward the DQ_DIRTY flag.
*/
- if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+ if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
oqa = otp->t_dqinfo->dqs[j];
nqa = ntp->t_dqinfo->dqs[j];
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ blk_res_used = 0;
+
if (oqa[i].qt_dquot == NULL)
break;
oq = &oqa[i];
nq = &nqa[i];
+ if (oq->qt_blk_res && oq->qt_bcount_delta > 0)
+ blk_res_used = oq->qt_bcount_delta;
+
nq->qt_dquot = oq->qt_dquot;
nq->qt_bcount_delta = nq->qt_icount_delta = 0;
nq->qt_rtbcount_delta = 0;
/*
* Transfer whatever is left of the reservations.
*/
- nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
- oq->qt_blk_res = oq->qt_blk_res_used;
+ nq->qt_blk_res = oq->qt_blk_res - blk_res_used;
+ oq->qt_blk_res = blk_res_used;
nq->qt_rtblk_res = oq->qt_rtblk_res -
oq->qt_rtblk_res_used;
* disk blocks used.
*/
case XFS_TRANS_DQ_BCOUNT:
- if (qtrx->qt_blk_res && delta > 0) {
- qtrx->qt_blk_res_used += (ulong)delta;
- ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
- }
qtrx->qt_bcount_delta += delta;
break;
* reservation that a transaction structure knows of.
*/
if (qtrx->qt_blk_res != 0) {
- if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
- if (qtrx->qt_blk_res >
- qtrx->qt_blk_res_used)
+ ulong blk_res_used = 0;
+
+ if (qtrx->qt_bcount_delta > 0)
+ blk_res_used = qtrx->qt_bcount_delta;
+
+ if (qtrx->qt_blk_res != blk_res_used) {
+ if (qtrx->qt_blk_res > blk_res_used)
dqp->q_res_bcount -= (xfs_qcnt_t)
(qtrx->qt_blk_res -
- qtrx->qt_blk_res_used);
+ blk_res_used);
else
dqp->q_res_bcount -= (xfs_qcnt_t)
- (qtrx->qt_blk_res_used -
+ (blk_res_used -
qtrx->qt_blk_res);
}
} else {
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
- int flags);
+ bool abort);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
return fwnode && fwnode->type == FWNODE_ACPI;
}
-static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
{
return is_acpi_node(fwnode) ?
container_of(fwnode, struct acpi_device, fwnode) : NULL;
{
return acpi_backlight_vendor;
}
-static void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
+static inline void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type)
{
}
#endif
#endif
#ifdef CONFIG_SMP
+
+#ifndef smp_mb
#define smp_mb() mb()
+#endif
+
+#ifndef smp_rmb
#define smp_rmb() rmb()
+#endif
+
+#ifndef smp_wmb
#define smp_wmb() wmb()
+#endif
+
+#ifndef smp_read_barrier_depends
#define smp_read_barrier_depends() read_barrier_depends()
-#else
+#endif
+
+#else /* !CONFIG_SMP */
+
+#ifndef smp_mb
#define smp_mb() barrier()
+#endif
+
+#ifndef smp_rmb
#define smp_rmb() barrier()
+#endif
+
+#ifndef smp_wmb
#define smp_wmb() barrier()
+#endif
+
+#ifndef smp_read_barrier_depends
#define smp_read_barrier_depends() do { } while (0)
#endif
+#endif /* CONFIG_SMP */
+
#ifndef smp_store_mb
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#endif
return adev ? adev->handle : NULL;
}
-#define ACPI_COMPANION(dev) acpi_node((dev)->fwnode)
+#define ACPI_COMPANION(dev) to_acpi_node((dev)->fwnode)
#define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \
acpi_fwnode_handle(adev) : NULL)
#define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev))
return false;
}
-static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode)
{
return NULL;
}
(volatile typeof(x) *)&(x); })
#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
+/**
+ * lockless_dereference() - safely load a pointer for later dereference
+ * @p: The pointer to load
+ *
+ * Similar to rcu_dereference(), but for situations where the pointed-to
+ * object's lifetime is managed by something other than RCU. That
+ * "something other" might be reference counting or simple immortality.
+ */
+#define lockless_dereference(p) \
+({ \
+ typeof(p) _________p1 = READ_ONCE(p); \
+ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+ (_________p1); \
+})
+
/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
#ifdef CONFIG_KPROBES
# define __kprobes __attribute__((__section__(".kprobes.text")))
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
+typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
#define MAY_EXEC 0x00000001
#define MAY_WRITE 0x00000002
int dax_clear_blocks(struct inode *, sector_t block, long size);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+ dax_iodone_t);
+int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+ dax_iodone_t);
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
+#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod)
+#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod)
#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
IRQCHIP_EOI_THREADED = (1 << 6),
};
-/* This include will go away once we isolated irq_desc usage to core code */
#include <linux/irqdesc.h>
/*
/*
* Core internal functions to deal with irq descriptors
- *
- * This include will move to kernel/irq once we cleaned up the tree.
- * For now it's included from <linux/irq.h>
*/
struct irq_affinity_notify;
#endif
}
+static inline unsigned int irq_desc_get_irq(struct irq_desc *desc)
+{
+ return desc->irq_data.irq;
+}
+
static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
{
return &desc->irq_data;
desc->name = name;
}
+/**
+ * irq_set_handler_locked - Set irq handler from a locked region
+ * @data: Pointer to the irq_data structure which identifies the irq
+ * @handler: Flow control handler function for this interrupt
+ *
+ * Sets the handler in the irq descriptor associated to @data.
+ *
+ * Must be called with irq_desc locked and valid parameters. Typical
+ * call site is the irq_set_type() callback.
+ */
+static inline void irq_set_handler_locked(struct irq_data *data,
+ irq_flow_handler_t handler)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ desc->handle_irq = handler;
+}
+
+/**
+ * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region
+ * @data: Pointer to the irq_data structure for which the chip is set
+ * @chip: Pointer to the new irq chip
+ * @handler: Flow control handler function for this interrupt
+ * @name: Name of the interrupt
+ *
+ * Replace the irq chip at the proper hierarchy level in @data and
+ * sets the handler and name in the associated irq descriptor.
+ *
+ * Must be called with irq_desc locked and valid parameters.
+ */
+static inline void
+irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
+ irq_flow_handler_t handler, const char *name)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ desc->handle_irq = handler;
+ desc->name = name;
+ data->chip = chip;
+}
+
static inline int irq_balancing_disabled(unsigned int irq)
{
struct irq_desc *desc;
; \
else
-#ifdef CONFIG_SMP
-#define irq_node(irq) (irq_get_irq_data(irq)->node)
-#else
-#define irq_node(irq) 0
-#endif
-
# define for_each_active_irq(irq) \
for (irq = irq_get_next_irq(0); irq < nr_irqs; \
irq = irq_get_next_irq(irq + 1))
#endif
/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */
-#define VERIFY_OCTAL_PERMISSIONS(perms) \
- (BUILD_BUG_ON_ZERO((perms) < 0) + \
- BUILD_BUG_ON_ZERO((perms) > 0777) + \
- /* User perms >= group perms >= other perms */ \
- BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \
- BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \
- /* Other writable? Generally considered a bad idea. */ \
- BUILD_BUG_ON_ZERO((perms) & 2) + \
+#define VERIFY_OCTAL_PERMISSIONS(perms) \
+ (BUILD_BUG_ON_ZERO((perms) < 0) + \
+ BUILD_BUG_ON_ZERO((perms) > 0777) + \
+ /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \
+ BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \
+ BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \
+ /* USER_WRITABLE >= GROUP_WRITABLE */ \
+ BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \
+ /* OTHER_WRITABLE? Generally considered a bad idea. */ \
+ BUILD_BUG_ON_ZERO((perms) & 2) + \
(perms))
#endif
#include <linux/moduleparam.h>
#include <linux/jump_label.h>
#include <linux/export.h>
+#include <linux/rbtree_latch.h>
#include <linux/percpu.h>
#include <asm/module.h>
MODULE_STATE_UNFORMED, /* Still setting it up. */
};
+struct module;
+
+struct mod_tree_node {
+ struct module *mod;
+ struct latch_tree_node node;
+};
+
struct module {
enum module_state state;
unsigned int num_syms;
/* Kernel parameters. */
+#ifdef CONFIG_SYSFS
+ struct mutex param_lock;
+#endif
struct kernel_param *kp;
unsigned int num_kp;
/* Startup function. */
int (*init)(void);
- /* If this is non-NULL, vfree after init() returns */
- void *module_init;
+ /*
+ * If this is non-NULL, vfree() after init() returns.
+ *
+ * Cacheline align here, such that:
+ * module_init, module_core, init_size, core_size,
+ * init_text_size, core_text_size and mtn_core::{mod,node[0]}
+ * are on the same cacheline.
+ */
+ void *module_init ____cacheline_aligned;
/* Here is the actual code + data, vfree'd on unload. */
void *module_core;
/* The size of the executable code in each section. */
unsigned int init_text_size, core_text_size;
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+ /*
+ * We want mtn_core::{mod,node[0]} to be in the same cacheline as the
+ * above entries such that a regular lookup will only touch one
+ * cacheline.
+ */
+ struct mod_tree_node mtn_core;
+ struct mod_tree_node mtn_init;
+#endif
+
/* Size of RO sections of the module (text+rodata) */
unsigned int init_ro_size, core_ro_size;
ctor_fn_t *ctors;
unsigned int num_ctors;
#endif
-};
+} ____cacheline_aligned;
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
#endif
bool unused;
};
-/* Search for an exported symbol by name. */
+/*
+ * Search for an exported symbol by name.
+ *
+ * Must be called with module_mutex held or preemption disabled.
+ */
const struct kernel_symbol *find_symbol(const char *name,
struct module **owner,
const unsigned long **crc,
bool gplok,
bool warn);
-/* Walk the exported symbol table */
+/*
+ * Walk the exported symbol table
+ *
+ * Must be called with module_mutex held or preemption disabled.
+ */
bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
struct module *owner,
void *data), void *data);
struct kernel_param {
const char *name;
+ struct module *mod;
const struct kernel_param_ops *ops;
- u16 perm;
+ const u16 perm;
s8 level;
u8 flags;
union {
*
* @perm is 0 if the the variable is not to appear in sysfs, or 0444
* for world-readable, 0644 for root-writable, etc. Note that if it
- * is writable, you may need to use kparam_block_sysfs_write() around
+ * is writable, you may need to use kernel_param_lock() around
* accesses (esp. charp, which can be kfreed when it changes).
*
* The @type is simply pasted to refer to a param_ops_##type and a
parameters. */
#define __module_param_call(prefix, name, ops, arg, perm, level, flags) \
/* Default value instead of permissions? */ \
- static const char __param_str_##name[] = prefix #name; \
+ static const char __param_str_##name[] = prefix #name; \
static struct kernel_param __moduleparam_const __param_##name \
__used \
__attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
- = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \
- level, flags, { arg } }
+ = { __param_str_##name, THIS_MODULE, ops, \
+ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }
/* Obsolete - use module_param_cb() */
#define module_param_call(name, set, get, arg, perm) \
- static struct kernel_param_ops __param_ops_##name = \
+ static const struct kernel_param_ops __param_ops_##name = \
{ .flags = 0, (void *)set, (void *)get }; \
__module_param_call(MODULE_PARAM_PREFIX, \
name, &__param_ops_##name, arg, \
return 0;
}
-/**
- * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs.
- * @name: the name of the parameter
- *
- * There's no point blocking write on a paramter that isn't writable via sysfs!
- */
-#define kparam_block_sysfs_write(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0222)); \
- __kernel_param_lock(); \
- } while (0)
-
-/**
- * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again.
- * @name: the name of the parameter
- */
-#define kparam_unblock_sysfs_write(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0222)); \
- __kernel_param_unlock(); \
- } while (0)
-
-/**
- * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs.
- * @name: the name of the parameter
- *
- * This also blocks sysfs writes.
- */
-#define kparam_block_sysfs_read(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0444)); \
- __kernel_param_lock(); \
- } while (0)
-
-/**
- * kparam_unblock_sysfs_read - allows sysfs to read a parameter again.
- * @name: the name of the parameter
- */
-#define kparam_unblock_sysfs_read(name) \
- do { \
- BUG_ON(!(__param_##name.perm & 0444)); \
- __kernel_param_unlock(); \
- } while (0)
-
#ifdef CONFIG_SYSFS
-extern void __kernel_param_lock(void);
-extern void __kernel_param_unlock(void);
+extern void kernel_param_lock(struct module *mod);
+extern void kernel_param_unlock(struct module *mod);
#else
-static inline void __kernel_param_lock(void)
+static inline void kernel_param_lock(struct module *mod)
{
}
-static inline void __kernel_param_unlock(void)
+static inline void kernel_param_unlock(struct module *mod)
{
}
#endif
#define __param_check(name, p, type) \
static inline type __always_unused *__check_##name(void) { return(p); }
-extern struct kernel_param_ops param_ops_byte;
+extern const struct kernel_param_ops param_ops_byte;
extern int param_set_byte(const char *val, const struct kernel_param *kp);
extern int param_get_byte(char *buffer, const struct kernel_param *kp);
#define param_check_byte(name, p) __param_check(name, p, unsigned char)
-extern struct kernel_param_ops param_ops_short;
+extern const struct kernel_param_ops param_ops_short;
extern int param_set_short(const char *val, const struct kernel_param *kp);
extern int param_get_short(char *buffer, const struct kernel_param *kp);
#define param_check_short(name, p) __param_check(name, p, short)
-extern struct kernel_param_ops param_ops_ushort;
+extern const struct kernel_param_ops param_ops_ushort;
extern int param_set_ushort(const char *val, const struct kernel_param *kp);
extern int param_get_ushort(char *buffer, const struct kernel_param *kp);
#define param_check_ushort(name, p) __param_check(name, p, unsigned short)
-extern struct kernel_param_ops param_ops_int;
+extern const struct kernel_param_ops param_ops_int;
extern int param_set_int(const char *val, const struct kernel_param *kp);
extern int param_get_int(char *buffer, const struct kernel_param *kp);
#define param_check_int(name, p) __param_check(name, p, int)
-extern struct kernel_param_ops param_ops_uint;
+extern const struct kernel_param_ops param_ops_uint;
extern int param_set_uint(const char *val, const struct kernel_param *kp);
extern int param_get_uint(char *buffer, const struct kernel_param *kp);
#define param_check_uint(name, p) __param_check(name, p, unsigned int)
-extern struct kernel_param_ops param_ops_long;
+extern const struct kernel_param_ops param_ops_long;
extern int param_set_long(const char *val, const struct kernel_param *kp);
extern int param_get_long(char *buffer, const struct kernel_param *kp);
#define param_check_long(name, p) __param_check(name, p, long)
-extern struct kernel_param_ops param_ops_ulong;
+extern const struct kernel_param_ops param_ops_ulong;
extern int param_set_ulong(const char *val, const struct kernel_param *kp);
extern int param_get_ulong(char *buffer, const struct kernel_param *kp);
#define param_check_ulong(name, p) __param_check(name, p, unsigned long)
-extern struct kernel_param_ops param_ops_ullong;
+extern const struct kernel_param_ops param_ops_ullong;
extern int param_set_ullong(const char *val, const struct kernel_param *kp);
extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
#define param_check_ullong(name, p) __param_check(name, p, unsigned long long)
-extern struct kernel_param_ops param_ops_charp;
+extern const struct kernel_param_ops param_ops_charp;
extern int param_set_charp(const char *val, const struct kernel_param *kp);
extern int param_get_charp(char *buffer, const struct kernel_param *kp);
#define param_check_charp(name, p) __param_check(name, p, char *)
/* We used to allow int as well as bool. We're taking that away! */
-extern struct kernel_param_ops param_ops_bool;
+extern const struct kernel_param_ops param_ops_bool;
extern int param_set_bool(const char *val, const struct kernel_param *kp);
extern int param_get_bool(char *buffer, const struct kernel_param *kp);
#define param_check_bool(name, p) __param_check(name, p, bool)
-extern struct kernel_param_ops param_ops_invbool;
+extern const struct kernel_param_ops param_ops_bool_enable_only;
+extern int param_set_bool_enable_only(const char *val,
+ const struct kernel_param *kp);
+/* getter is the same as for the regular bool */
+#define param_check_bool_enable_only param_check_bool
+
+extern const struct kernel_param_ops param_ops_invbool;
extern int param_set_invbool(const char *val, const struct kernel_param *kp);
extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
#define param_check_invbool(name, p) __param_check(name, p, bool)
/* An int, which can only be set like a bool (though it shows as an int). */
-extern struct kernel_param_ops param_ops_bint;
+extern const struct kernel_param_ops param_ops_bint;
extern int param_set_bint(const char *val, const struct kernel_param *kp);
#define param_get_bint param_get_int
#define param_check_bint param_check_int
perm, -1, 0); \
__MODULE_PARM_TYPE(name, "array of " #type)
-extern struct kernel_param_ops param_array_ops;
+extern const struct kernel_param_ops param_array_ops;
-extern struct kernel_param_ops param_ops_string;
+extern const struct kernel_param_ops param_ops_string;
extern int param_set_copystring(const char *val, const struct kernel_param *);
extern int param_get_string(char *buffer, const struct kernel_param *kp);
return fwnode && fwnode->type == FWNODE_OF;
}
-static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
{
return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL;
}
return false;
}
-static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+static inline struct device_node *to_of_node(struct fwnode_handle *fwnode)
{
return NULL;
}
#include <linux/kernel.h>
#include <linux/stddef.h>
+#include <linux/rcupdate.h>
struct rb_node {
unsigned long __rb_parent_color;
extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
-extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
-static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
- struct rb_node ** rb_link)
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+ struct rb_node **rb_link)
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
*rb_link = node;
}
+static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
+ struct rb_node **rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ rcu_assign_pointer(*rb_link, node);
+}
+
#define rb_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? rb_entry(____ptr, type, member) : NULL; \
{
if (parent) {
if (parent->rb_left == old)
- parent->rb_left = new;
+ WRITE_ONCE(parent->rb_left, new);
else
- parent->rb_right = new;
+ WRITE_ONCE(parent->rb_right, new);
} else
- root->rb_node = new;
+ WRITE_ONCE(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *child = node->rb_right;
+ struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
+
tmp = child->rb_left;
if (!tmp) {
/*
*/
parent = successor;
child2 = successor->rb_right;
+
augment->copy(node, successor);
} else {
/*
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
- parent->rb_left = child2 = successor->rb_right;
- successor->rb_right = child;
+ child2 = successor->rb_right;
+ WRITE_ONCE(parent->rb_left, child2);
+ WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
+
augment->copy(node, successor);
augment->propagate(parent, successor);
}
- successor->rb_left = tmp = node->rb_left;
+ tmp = node->rb_left;
+ WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
+
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
--- /dev/null
+/*
+ * Latched RB-trees
+ *
+ * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org>
+ *
+ * Since RB-trees have non-atomic modifications they're not immediately suited
+ * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for
+ * lockless lookups; we cannot guarantee they return a correct result.
+ *
+ * The simplest solution is a seqlock + RB-tree, this will allow lockless
+ * lookups; but has the constraint (inherent to the seqlock) that read sides
+ * cannot nest in write sides.
+ *
+ * If we need to allow unconditional lookups (say as required for NMI context
+ * usage) we need a more complex setup; this data structure provides this by
+ * employing the latch technique -- see @raw_write_seqcount_latch -- to
+ * implement a latched RB-tree which does allow for unconditional lookups by
+ * virtue of always having (at least) one stable copy of the tree.
+ *
+ * However, while we have the guarantee that there is at all times one stable
+ * copy, this does not guarantee an iteration will not observe modifications.
+ * What might have been a stable copy at the start of the iteration, need not
+ * remain so for the duration of the iteration.
+ *
+ * Therefore, this does require a lockless RB-tree iteration to be non-fatal;
+ * see the comment in lib/rbtree.c. Note however that we only require the first
+ * condition -- not seeing partial stores -- because the latch thing isolates
+ * us from loops. If we were to interrupt a modification the lookup would be
+ * pointed at the stable tree and complete while the modification was halted.
+ */
+
+#ifndef RB_TREE_LATCH_H
+#define RB_TREE_LATCH_H
+
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+
+struct latch_tree_node {
+ struct rb_node node[2];
+};
+
+struct latch_tree_root {
+ seqcount_t seq;
+ struct rb_root tree[2];
+};
+
+/**
+ * latch_tree_ops - operators to define the tree order
+ * @less: used for insertion; provides the (partial) order between two elements.
+ * @comp: used for lookups; provides the order between the search key and an element.
+ *
+ * The operators are related like:
+ *
+ * comp(a->key,b) < 0 := less(a,b)
+ * comp(a->key,b) > 0 := less(b,a)
+ * comp(a->key,b) == 0 := !less(a,b) && !less(b,a)
+ *
+ * If these operators define a partial order on the elements we make no
+ * guarantee on which of the elements matching the key is found. See
+ * latch_tree_find().
+ */
+struct latch_tree_ops {
+ bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b);
+ int (*comp)(void *key, struct latch_tree_node *b);
+};
+
+static __always_inline struct latch_tree_node *
+__lt_from_rb(struct rb_node *node, int idx)
+{
+ return container_of(node, struct latch_tree_node, node[idx]);
+}
+
+static __always_inline void
+__lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx,
+ bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b))
+{
+ struct rb_root *root = <r->tree[idx];
+ struct rb_node **link = &root->rb_node;
+ struct rb_node *node = <n->node[idx];
+ struct rb_node *parent = NULL;
+ struct latch_tree_node *ltp;
+
+ while (*link) {
+ parent = *link;
+ ltp = __lt_from_rb(parent, idx);
+
+ if (less(ltn, ltp))
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
+ }
+
+ rb_link_node_rcu(node, parent, link);
+ rb_insert_color(node, root);
+}
+
+static __always_inline void
+__lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx)
+{
+ rb_erase(<n->node[idx], <r->tree[idx]);
+}
+
+static __always_inline struct latch_tree_node *
+__lt_find(void *key, struct latch_tree_root *ltr, int idx,
+ int (*comp)(void *key, struct latch_tree_node *node))
+{
+ struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node);
+ struct latch_tree_node *ltn;
+ int c;
+
+ while (node) {
+ ltn = __lt_from_rb(node, idx);
+ c = comp(key, ltn);
+
+ if (c < 0)
+ node = rcu_dereference_raw(node->rb_left);
+ else if (c > 0)
+ node = rcu_dereference_raw(node->rb_right);
+ else
+ return ltn;
+ }
+
+ return NULL;
+}
+
+/**
+ * latch_tree_insert() - insert @node into the trees @root
+ * @node: nodes to insert
+ * @root: trees to insert @node into
+ * @ops: operators defining the node order
+ *
+ * It inserts @node into @root in an ordered fashion such that we can always
+ * observe one complete tree. See the comment for raw_write_seqcount_latch().
+ *
+ * The inserts use rcu_assign_pointer() to publish the element such that the
+ * tree structure is stored before we can observe the new @node.
+ *
+ * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be
+ * serialized.
+ */
+static __always_inline void
+latch_tree_insert(struct latch_tree_node *node,
+ struct latch_tree_root *root,
+ const struct latch_tree_ops *ops)
+{
+ raw_write_seqcount_latch(&root->seq);
+ __lt_insert(node, root, 0, ops->less);
+ raw_write_seqcount_latch(&root->seq);
+ __lt_insert(node, root, 1, ops->less);
+}
+
+/**
+ * latch_tree_erase() - removes @node from the trees @root
+ * @node: nodes to remote
+ * @root: trees to remove @node from
+ * @ops: operators defining the node order
+ *
+ * Removes @node from the trees @root in an ordered fashion such that we can
+ * always observe one complete tree. See the comment for
+ * raw_write_seqcount_latch().
+ *
+ * It is assumed that @node will observe one RCU quiescent state before being
+ * reused of freed.
+ *
+ * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be
+ * serialized.
+ */
+static __always_inline void
+latch_tree_erase(struct latch_tree_node *node,
+ struct latch_tree_root *root,
+ const struct latch_tree_ops *ops)
+{
+ raw_write_seqcount_latch(&root->seq);
+ __lt_erase(node, root, 0);
+ raw_write_seqcount_latch(&root->seq);
+ __lt_erase(node, root, 1);
+}
+
+/**
+ * latch_tree_find() - find the node matching @key in the trees @root
+ * @key: search key
+ * @root: trees to search for @key
+ * @ops: operators defining the node order
+ *
+ * Does a lockless lookup in the trees @root for the node matching @key.
+ *
+ * It is assumed that this is called while holding the appropriate RCU read
+ * side lock.
+ *
+ * If the operators define a partial order on the elements (there are multiple
+ * elements which have the same key value) it is undefined which of these
+ * elements will be found. Nor is it possible to iterate the tree to find
+ * further elements with the same key value.
+ *
+ * Returns: a pointer to the node matching @key or NULL.
+ */
+static __always_inline struct latch_tree_node *
+latch_tree_find(void *key, struct latch_tree_root *root,
+ const struct latch_tree_ops *ops)
+{
+ struct latch_tree_node *node;
+ unsigned int seq;
+
+ do {
+ seq = raw_read_seqcount_latch(&root->seq);
+ node = __lt_find(key, root, seq & 1, ops->comp);
+ } while (read_seqcount_retry(&root->seq, seq));
+
+ return node;
+}
+
+#endif /* RB_TREE_LATCH_H */
*/
#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
-/**
- * lockless_dereference() - safely load a pointer for later dereference
- * @p: The pointer to load
- *
- * Similar to rcu_dereference(), but for situations where the pointed-to
- * object's lifetime is managed by something other than RCU. That
- * "something other" might be reference counting or simple immortality.
- */
-#define lockless_dereference(p) \
-({ \
- typeof(p) _________p1 = READ_ONCE(p); \
- smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
- (_________p1); \
-})
-
/**
* rcu_assign_pointer() - assign to RCU-protected pointer
* @p: pointer to assign to
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <linux/lockdep.h>
+#include <linux/compiler.h>
#include <asm/processor.h>
/*
s->sequence++;
}
-/*
+static inline int raw_read_seqcount_latch(seqcount_t *s)
+{
+ return lockless_dereference(s->sequence);
+}
+
+/**
* raw_write_seqcount_latch - redirect readers to even/odd copy
* @s: pointer to seqcount_t
+ *
+ * The latch technique is a multiversion concurrency control method that allows
+ * queries during non-atomic modifications. If you can guarantee queries never
+ * interrupt the modification -- e.g. the concurrency is strictly between CPUs
+ * -- you most likely do not need this.
+ *
+ * Where the traditional RCU/lockless data structures rely on atomic
+ * modifications to ensure queries observe either the old or the new state the
+ * latch allows the same for non-atomic updates. The trade-off is doubling the
+ * cost of storage; we have to maintain two copies of the entire data
+ * structure.
+ *
+ * Very simply put: we first modify one copy and then the other. This ensures
+ * there is always one copy in a stable state, ready to give us an answer.
+ *
+ * The basic form is a data structure like:
+ *
+ * struct latch_struct {
+ * seqcount_t seq;
+ * struct data_struct data[2];
+ * };
+ *
+ * Where a modification, which is assumed to be externally serialized, does the
+ * following:
+ *
+ * void latch_modify(struct latch_struct *latch, ...)
+ * {
+ * smp_wmb(); <- Ensure that the last data[1] update is visible
+ * latch->seq++;
+ * smp_wmb(); <- Ensure that the seqcount update is visible
+ *
+ * modify(latch->data[0], ...);
+ *
+ * smp_wmb(); <- Ensure that the data[0] update is visible
+ * latch->seq++;
+ * smp_wmb(); <- Ensure that the seqcount update is visible
+ *
+ * modify(latch->data[1], ...);
+ * }
+ *
+ * The query will have a form like:
+ *
+ * struct entry *latch_query(struct latch_struct *latch, ...)
+ * {
+ * struct entry *entry;
+ * unsigned seq, idx;
+ *
+ * do {
+ * seq = lockless_dereference(latch->seq);
+ *
+ * idx = seq & 0x01;
+ * entry = data_query(latch->data[idx], ...);
+ *
+ * smp_rmb();
+ * } while (seq != latch->seq);
+ *
+ * return entry;
+ * }
+ *
+ * So during the modification, queries are first redirected to data[1]. Then we
+ * modify data[0]. When that is complete, we redirect queries back to data[0]
+ * and we can modify data[1].
+ *
+ * NOTE: The non-requirement for atomic modifications does _NOT_ include
+ * the publishing of new entries in the case where data is a dynamic
+ * data structure.
+ *
+ * An iteration might start in data[0] and get suspended long enough
+ * to miss an entire modification sequence, once it resumes it might
+ * observe the new entry.
+ *
+ * NOTE: When data is a dynamic data structure; one should use regular RCU
+ * patterns to manage the lifetimes of the objects within.
*/
static inline void raw_write_seqcount_latch(seqcount_t *s)
{
#include <linux/slab.h>
#include <linux/atomic.h>
#include <net/neighbour.h>
+#include <net/sock.h>
#define AX25_T1CLAMPLO 1
#define AX25_T1CLAMPHI (30 * HZ)
atomic_t refcount;
} ax25_cb;
-#define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo)
+struct ax25_sock {
+ struct sock sk;
+ struct ax25_cb *cb;
+};
+
+static inline struct ax25_sock *ax25_sk(const struct sock *sk)
+{
+ return (struct ax25_sock *) sk;
+}
+
+static inline struct ax25_cb *sk_to_ax25(const struct sock *sk)
+{
+ return ax25_sk(sk)->cb;
+}
#define ax25_for_each(__ax25, list) \
hlist_for_each_entry(__ax25, list, ax25_node)
* @sk_incoming_cpu: record cpu processing incoming packets
* @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions
- * @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
* @sk_tsflags: SO_TIMESTAMPING socket options
const struct cred *sk_peer_cred;
long sk_rcvtimeo;
long sk_sndtimeo;
- void *sk_protinfo;
struct timer_list sk_timer;
ktime_t sk_stamp;
u16 sk_tsflags;
TP_ARGS(wq)
);
-#define show_oper_type(type) \
- __print_symbolic(type, \
- { BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \
- { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \
- { BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \
- { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
-
-DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
-
- TP_PROTO(struct btrfs_qgroup_operation *oper),
-
- TP_ARGS(oper),
-
- TP_STRUCT__entry(
- __field( u64, ref_root )
- __field( u64, bytenr )
- __field( u64, num_bytes )
- __field( u64, seq )
- __field( int, type )
- __field( u64, elem_seq )
- ),
-
- TP_fast_assign(
- __entry->ref_root = oper->ref_root;
- __entry->bytenr = oper->bytenr,
- __entry->num_bytes = oper->num_bytes;
- __entry->seq = oper->seq;
- __entry->type = oper->type;
- __entry->elem_seq = oper->elem.seq;
- ),
-
- TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
- "seq = %llu, elem.seq = %llu, type = %s",
- (unsigned long long)__entry->ref_root,
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->num_bytes,
- (unsigned long long)__entry->seq,
- (unsigned long long)__entry->elem_seq,
- show_oper_type(__entry->type))
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
-
- TP_PROTO(struct btrfs_qgroup_operation *oper),
-
- TP_ARGS(oper)
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
-
- TP_PROTO(struct btrfs_qgroup_operation *oper),
-
- TP_ARGS(oper)
-);
-
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
#define _UAPI_LINUX_IN_H
#include <linux/types.h>
+#include <linux/libc-compat.h>
#include <linux/socket.h>
+#if __UAPI_DEF_IN_IPPROTO
/* Standard well-defined IP protocols. */
enum {
IPPROTO_IP = 0, /* Dummy protocol for TCP */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MAX
};
+#endif
-
+#if __UAPI_DEF_IN_ADDR
/* Internet address. */
struct in_addr {
__be32 s_addr;
};
+#endif
#define IP_TOS 1
#define IP_TTL 2
/* Request struct for multicast socket ops */
+#if __UAPI_DEF_IP_MREQ
struct ip_mreq {
struct in_addr imr_multiaddr; /* IP multicast address of group */
struct in_addr imr_interface; /* local IP address of interface */
#define GROUP_FILTER_SIZE(numsrc) \
(sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
+ (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
+#if __UAPI_DEF_IN_PKTINFO
struct in_pktinfo {
int ipi_ifindex;
struct in_addr ipi_spec_dst;
struct in_addr ipi_addr;
};
+#endif
/* Structure describing an Internet (IP) socket address. */
+#if __UAPI_DEF_SOCKADDR_IN
#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
struct sockaddr_in {
__kernel_sa_family_t sin_family; /* Address family */
sizeof(unsigned short int) - sizeof(struct in_addr)];
};
#define sin_zero __pad /* for BSD UNIX comp. -FvK */
+#endif
-
+#if __UAPI_DEF_IN_CLASS
/*
* Definitions of the bits in an Internet address integer.
* On subnets, host and network parts are found according
#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
-
+#endif
/* <asm/byteorder.h> contains the htonl type stuff.. */
#include <asm/byteorder.h>
/* GLIBC headers included first so don't define anything
* that would already be defined. */
+#define __UAPI_DEF_IN_ADDR 0
+#define __UAPI_DEF_IN_IPPROTO 0
+#define __UAPI_DEF_IN_PKTINFO 0
+#define __UAPI_DEF_IP_MREQ 0
+#define __UAPI_DEF_SOCKADDR_IN 0
+#define __UAPI_DEF_IN_CLASS 0
+
#define __UAPI_DEF_IN6_ADDR 0
/* The exception is the in6_addr macros which must be defined
* if the glibc code didn't define them. This guard matches
/* Linux headers included first, and we must define everything
* we need. The expectation is that glibc will check the
* __UAPI_DEF_* defines and adjust appropriately. */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
#define __UAPI_DEF_IN6_ADDR 1
/* We unconditionally define the in6_addr macros and glibc must
* coordinate. */
* that we need. */
#else /* !defined(__GLIBC__) */
+/* Definitions for in.h */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
/* Definitions for in6.h */
#define __UAPI_DEF_IN6_ADDR 1
#define __UAPI_DEF_IN6_ADDR_ALT 1
bool "Compress modules on installation"
depends on MODULES
help
- This option compresses the kernel modules when 'make
- modules_install' is run.
- The modules will be compressed either using gzip or xz depend on the
- choice made in "Compression algorithm".
+ Compresses kernel modules when 'make modules_install' is run; gzip or
+ xz depending on "Compression algorithm" below.
- module-init-tools has support for gzip format while kmod handle gzip
- and xz compressed modules.
+ module-init-tools MAY support gzip, and kmod MAY support gzip and xz.
- When a kernel module is installed from outside of the main kernel
- source and uses the Kbuild system for installing modules then that
- kernel module will also be compressed when it is installed.
+ Out-of-tree kernel modules installed using Kbuild will also be
+ compressed upon installation.
- This option provides little benefit when the modules are to be used inside
- an initrd or initramfs, it generally is more efficient to compress the whole
- initrd or initramfs instead.
+ Note: for modules inside an initrd or initramfs, it's more efficient
+ to compress the whole initrd or initramfs instead.
- This is fully compatible with signed modules while the signed module is
- compressed. module-init-tools or kmod handles decompression and provide to
- other layer the uncompressed but signed payload.
+ Note: This is fully compatible with signed modules.
+
+ If in doubt, say N.
choice
prompt "Compression algorithm"
endif # MODULES
+config MODULES_TREE_LOOKUP
+ def_bool y
+ depends on PERF_EVENTS || TRACING
+
config INIT_ALL_POSSIBLE
bool
help
--- /dev/null
+# global stuff - these enable us to allow some
+# of the not so generic stuff below for xen
+CONFIG_PARAVIRT=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_BLOCK=y
+CONFIG_WATCHDOG=y
+CONFIG_TARGET_CORE=y
+CONFIG_SCSI=y
+CONFIG_FB=y
+CONFIG_INPUT_MISC=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_TTY=y
+# Technically not required but otherwise produces
+# pretty useless systems starting from allnoconfig
+# You want TCP/IP and ELF binaries right?
+CONFIG_INET=y
+CONFIG_BINFMT_ELF=y
+# generic config
+CONFIG_XEN=y
+CONFIG_XEN_DOM0=y
+# backend drivers
+CONFIG_XEN_BACKEND=y
+CONFIG_XEN_BLKDEV_BACKEND=m
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_HVC_XEN=y
+CONFIG_XEN_WDT=m
+CONFIG_XEN_SCSI_BACKEND=m
+# frontend drivers
+CONFIG_XEN_FBDEV_FRONTEND=m
+CONFIG_HVC_XEN_FRONTEND=y
+CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
+CONFIG_XEN_SCSI_FRONTEND=m
+# others
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_DEV_EVTCHN=m
+CONFIG_XEN_BLKDEV_FRONTEND=m
+CONFIG_XEN_NETDEV_FRONTEND=m
+CONFIG_XENFS=m
+CONFIG_XEN_COMPAT_XENFS=y
+CONFIG_XEN_SYS_HYPERVISOR=y
+CONFIG_XEN_XENBUS_FRONTEND=y
+CONFIG_XEN_GNTDEV=m
+CONFIG_XEN_GRANT_DEV_ALLOC=m
+CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PRIVCMD=m
continue;
key = iterk;
- if (__module_address(iter->key) == mod) {
+ if (within_module(iter->key, mod)) {
/*
* Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
*/
key = (struct static_key *)(unsigned long)iter->key;
- if (__module_address(iter->key) == mod)
+ if (within_module(iter->key, mod))
continue;
prev = &key->next;
{
struct jump_entry *stop = __stop___jump_table;
struct jump_entry *entry = jump_label_get_entries(key);
-
#ifdef CONFIG_MODULES
- struct module *mod = __module_address((unsigned long)key);
+ struct module *mod;
__jump_label_mod_update(key, enable);
+ preempt_disable();
+ mod = __module_address((unsigned long)key);
if (mod)
stop = mod->jump_entries + mod->num_jump_entries;
+ preempt_enable();
#endif
/* if there are no users, entry can be NULL */
if (entry)
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
-#ifdef CONFIG_KGDB_KDB
-struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
-#endif /* CONFIG_KGDB_KDB */
-#ifdef CONFIG_MODULE_SIG
-#ifdef CONFIG_MODULE_SIG_FORCE
-static bool sig_enforce = true;
-#else
-static bool sig_enforce = false;
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * Because modules have two address ranges: init and core, we need two
+ * latch_tree_nodes entries. Therefore we need the back-pointer from
+ * mod_tree_node.
+ *
+ * Because init ranges are short lived we mark them unlikely and have placed
+ * them outside the critical cacheline in struct module.
+ *
+ * This is conditional on PERF_EVENTS || TRACING because those can really hit
+ * __module_address() hard by doing a lot of stack unwinding; potentially from
+ * NMI context.
+ */
-static int param_set_bool_enable_only(const char *val,
- const struct kernel_param *kp)
+static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
{
- int err;
- bool test;
- struct kernel_param dummy_kp = *kp;
+ struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+ struct module *mod = mtn->mod;
- dummy_kp.arg = &test;
+ if (unlikely(mtn == &mod->mtn_init))
+ return (unsigned long)mod->module_init;
- err = param_set_bool(val, &dummy_kp);
- if (err)
- return err;
+ return (unsigned long)mod->module_core;
+}
+
+static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+ struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+ struct module *mod = mtn->mod;
- /* Don't let them unset it once it's set! */
- if (!test && sig_enforce)
- return -EROFS;
+ if (unlikely(mtn == &mod->mtn_init))
+ return (unsigned long)mod->init_size;
+
+ return (unsigned long)mod->core_size;
+}
+
+static __always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+ return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+static __always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+ unsigned long val = (unsigned long)key;
+ unsigned long start, end;
+
+ start = __mod_tree_val(n);
+ if (val < start)
+ return -1;
+
+ end = start + __mod_tree_size(n);
+ if (val >= end)
+ return 1;
- if (test)
- sig_enforce = true;
return 0;
}
-static const struct kernel_param_ops param_ops_bool_enable_only = {
- .flags = KERNEL_PARAM_OPS_FL_NOARG,
- .set = param_set_bool_enable_only,
- .get = param_get_bool,
+static const struct latch_tree_ops mod_tree_ops = {
+ .less = mod_tree_less,
+ .comp = mod_tree_comp,
};
-#define param_check_bool_enable_only param_check_bool
+static struct mod_tree_root {
+ struct latch_tree_root root;
+ unsigned long addr_min;
+ unsigned long addr_max;
+} mod_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+
+#define module_addr_min mod_tree.addr_min
+#define module_addr_max mod_tree.addr_max
+
+static noinline void __mod_tree_insert(struct mod_tree_node *node)
+{
+ latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+static void __mod_tree_remove(struct mod_tree_node *node)
+{
+ latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+static void mod_tree_insert(struct module *mod)
+{
+ mod->mtn_core.mod = mod;
+ mod->mtn_init.mod = mod;
+
+ __mod_tree_insert(&mod->mtn_core);
+ if (mod->init_size)
+ __mod_tree_insert(&mod->mtn_init);
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+ if (mod->init_size)
+ __mod_tree_remove(&mod->mtn_init);
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+ __mod_tree_remove(&mod->mtn_core);
+ mod_tree_remove_init(mod);
+}
+
+static struct module *mod_find(unsigned long addr)
+{
+ struct latch_tree_node *ltn;
+
+ ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+ if (!ltn)
+ return NULL;
+
+ return container_of(ltn, struct mod_tree_node, node)->mod;
+}
+
+#else /* MODULES_TREE_LOOKUP */
+
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+
+static void mod_tree_insert(struct module *mod) { }
+static void mod_tree_remove_init(struct module *mod) { }
+static void mod_tree_remove(struct module *mod) { }
+
+static struct module *mod_find(unsigned long addr)
+{
+ struct module *mod;
+
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (within_module(addr, mod))
+ return mod;
+ }
+
+ return NULL;
+}
+
+#endif /* MODULES_TREE_LOOKUP */
+
+/*
+ * Bounds of module text, for speeding up __module_address.
+ * Protected by module_mutex.
+ */
+static void __mod_update_bounds(void *base, unsigned int size)
+{
+ unsigned long min = (unsigned long)base;
+ unsigned long max = min + size;
+
+ if (min < module_addr_min)
+ module_addr_min = min;
+ if (max > module_addr_max)
+ module_addr_max = max;
+}
+
+static void mod_update_bounds(struct module *mod)
+{
+ __mod_update_bounds(mod->module_core, mod->core_size);
+ if (mod->init_size)
+ __mod_update_bounds(mod->module_init, mod->init_size);
+}
+
+#ifdef CONFIG_KGDB_KDB
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_KGDB_KDB */
+
+static void module_assert_mutex(void)
+{
+ lockdep_assert_held(&module_mutex);
+}
+
+static void module_assert_mutex_or_preempt(void)
+{
+#ifdef CONFIG_LOCKDEP
+ if (unlikely(!debug_locks))
+ return;
+
+ WARN_ON(!rcu_read_lock_sched_held() &&
+ !lockdep_is_held(&module_mutex));
+#endif
+}
+
+static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
+#ifndef CONFIG_MODULE_SIG_FORCE
module_param(sig_enforce, bool_enable_only, 0644);
#endif /* !CONFIG_MODULE_SIG_FORCE */
-#endif /* CONFIG_MODULE_SIG */
/* Block module loading/unloading? */
int modules_disabled = 0;
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
-/* Bounds of module allocation, for speeding __module_address.
- * Protected by module_mutex. */
-static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-
int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
#endif
};
+ module_assert_mutex_or_preempt();
+
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
{
struct module *mod;
+ module_assert_mutex();
+
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
{
const unsigned long *crc;
- /* Since this should be found in kernel (which can't be removed),
- * no locking is necessary. */
+ /*
+ * Since this should be found in kernel (which can't be removed), no
+ * locking is necessary -- use preempt_disable() to placate lockdep.
+ */
+ preempt_disable();
if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
- &crc, true, false))
+ &crc, true, false)) {
+ preempt_enable();
BUG();
+ }
+ preempt_enable();
return check_version(sechdrs, versindex,
VMLINUX_SYMBOL_STR(module_layout), mod, crc,
NULL);
mod_kobject_put(mod);
}
+static void init_param_lock(struct module *mod)
+{
+ mutex_init(&mod->param_lock);
+}
#else /* !CONFIG_SYSFS */
static int mod_sysfs_setup(struct module *mod,
{
}
+static void init_param_lock(struct module *mod)
+{
+}
#endif /* CONFIG_SYSFS */
static void mod_sysfs_teardown(struct module *mod)
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
+ mod_tree_remove(mod);
/* Remove this module from bug list, this uses list_del_rcu */
module_bug_cleanup(mod);
- /* Wait for RCU synchronizing before releasing mod->list and buglist. */
- synchronize_rcu();
+ /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
+ synchronize_sched();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
return vmalloc_exec(size);
}
-static void *module_alloc_update_bounds(unsigned long size)
-{
- void *ret = module_alloc(size);
-
- if (ret) {
- mutex_lock(&module_mutex);
- /* Update module bounds. */
- if ((unsigned long)ret < module_addr_min)
- module_addr_min = (unsigned long)ret;
- if ((unsigned long)ret + size > module_addr_max)
- module_addr_max = (unsigned long)ret + size;
- mutex_unlock(&module_mutex);
- }
- return ret;
-}
-
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
void *ptr;
/* Do the allocs. */
- ptr = module_alloc_update_bounds(mod->core_size);
+ ptr = module_alloc(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
mod->module_core = ptr;
if (mod->init_size) {
- ptr = module_alloc_update_bounds(mod->init_size);
+ ptr = module_alloc(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
+ mod_tree_remove_init(mod);
unset_module_init_ro_nx(mod);
module_arch_freeing_init(mod);
mod->module_init = NULL;
mod->init_text_size = 0;
/*
* We want to free module_init, but be aware that kallsyms may be
- * walking this with preempt disabled. In all the failure paths,
- * we call synchronize_rcu/synchronize_sched, but we don't want
- * to slow down the success path, so use actual RCU here.
+ * walking this with preempt disabled. In all the failure paths, we
+ * call synchronize_sched(), but we don't want to slow down the success
+ * path, so use actual RCU here.
*/
- call_rcu(&freeinit->rcu, do_free_init);
+ call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
err = -EEXIST;
goto out;
}
+ mod_update_bounds(mod);
list_add_rcu(&mod->list, &modules);
+ mod_tree_insert(mod);
err = 0;
out:
if (err)
goto unlink_mod;
+ init_param_lock(mod);
+
/* Now we've got everything in the final locations, we can
* find optional sections. */
err = find_module_sections(mod, info);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
- /* Wait for RCU synchronizing before releasing mod->list. */
- synchronize_rcu();
+ /* Wait for RCU-sched synchronizing before releasing mod->list. */
+ synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
char **modname,
char *namebuf)
{
- struct module *mod;
const char *ret = NULL;
+ struct module *mod;
preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list) {
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod)) {
- if (modname)
- *modname = mod->name;
- ret = get_ksymbol(mod, addr, size, offset);
- break;
- }
+ mod = __module_address(addr);
+ if (mod) {
+ if (modname)
+ *modname = mod->name;
+ ret = get_ksymbol(mod, addr, size, offset);
}
/* Make a copy in here where it's safe */
if (ret) {
ret = namebuf;
}
preempt_enable();
+
return ret;
}
unsigned int i;
int ret;
+ module_assert_mutex();
+
list_for_each_entry(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
- list_for_each_entry_rcu(mod, &modules, list) {
+ module_assert_mutex_or_preempt();
+
+ mod = mod_find(addr);
+ if (mod) {
+ BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod))
- return mod;
+ mod = NULL;
}
- return NULL;
+ return mod;
}
EXPORT_SYMBOL_GPL(__module_address);
#include <linux/slab.h>
#include <linux/ctype.h>
-/* Protects all parameters, and incidentally kmalloced_param list. */
+#ifdef CONFIG_SYSFS
+/* Protects all built-in parameters, modules use their own param_lock */
static DEFINE_MUTEX(param_lock);
+/* Use the module's mutex, or if built-in use the built-in mutex */
+#ifdef CONFIG_MODULES
+#define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : ¶m_lock)
+#else
+#define KPARAM_MUTEX(mod) (¶m_lock)
+#endif
+
+static inline void check_kparam_locked(struct module *mod)
+{
+ BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod)));
+}
+#else
+static inline void check_kparam_locked(struct module *mod)
+{
+}
+#endif /* !CONFIG_SYSFS */
+
/* This just allows us to keep track of which parameters are kmalloced. */
struct kmalloced_param {
struct list_head list;
char val[];
};
static LIST_HEAD(kmalloced_params);
+static DEFINE_SPINLOCK(kmalloced_params_lock);
static void *kmalloc_parameter(unsigned int size)
{
if (!p)
return NULL;
+ spin_lock(&kmalloced_params_lock);
list_add(&p->list, &kmalloced_params);
+ spin_unlock(&kmalloced_params_lock);
+
return p->val;
}
{
struct kmalloced_param *p;
+ spin_lock(&kmalloced_params_lock);
list_for_each_entry(p, &kmalloced_params, list) {
if (p->val == param) {
list_del(&p->list);
break;
}
}
+ spin_unlock(&kmalloced_params_lock);
}
static char dash2underscore(char c)
return -EINVAL;
pr_debug("handling %s with %p\n", param,
params[i].ops->set);
- mutex_lock(¶m_lock);
+ kernel_param_lock(params[i].mod);
param_check_unsafe(¶ms[i]);
err = params[i].ops->set(val, ¶ms[i]);
- mutex_unlock(¶m_lock);
+ kernel_param_unlock(params[i].mod);
return err;
}
}
return scnprintf(buffer, PAGE_SIZE, format, \
*((type *)kp->arg)); \
} \
- struct kernel_param_ops param_ops_##name = { \
+ const struct kernel_param_ops param_ops_##name = { \
.set = param_set_##name, \
.get = param_get_##name, \
}; \
maybe_kfree_parameter(*((char **)arg));
}
-struct kernel_param_ops param_ops_charp = {
+const struct kernel_param_ops param_ops_charp = {
.set = param_set_charp,
.get = param_get_charp,
.free = param_free_charp,
}
EXPORT_SYMBOL(param_get_bool);
-struct kernel_param_ops param_ops_bool = {
+const struct kernel_param_ops param_ops_bool = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bool,
.get = param_get_bool,
};
EXPORT_SYMBOL(param_ops_bool);
+int param_set_bool_enable_only(const char *val, const struct kernel_param *kp)
+{
+ int err = 0;
+ bool new_value;
+ bool orig_value = *(bool *)kp->arg;
+ struct kernel_param dummy_kp = *kp;
+
+ dummy_kp.arg = &new_value;
+
+ err = param_set_bool(val, &dummy_kp);
+ if (err)
+ return err;
+
+ /* Don't let them unset it once it's set! */
+ if (!new_value && orig_value)
+ return -EROFS;
+
+ if (new_value)
+ err = param_set_bool(val, kp);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(param_set_bool_enable_only);
+
+const struct kernel_param_ops param_ops_bool_enable_only = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = param_set_bool_enable_only,
+ .get = param_get_bool,
+};
+EXPORT_SYMBOL_GPL(param_ops_bool_enable_only);
+
/* This one must be bool. */
int param_set_invbool(const char *val, const struct kernel_param *kp)
{
}
EXPORT_SYMBOL(param_get_invbool);
-struct kernel_param_ops param_ops_invbool = {
+const struct kernel_param_ops param_ops_invbool = {
.set = param_set_invbool,
.get = param_get_invbool,
};
int param_set_bint(const char *val, const struct kernel_param *kp)
{
- struct kernel_param boolkp;
+ /* Match bool exactly, by re-using it. */
+ struct kernel_param boolkp = *kp;
bool v;
int ret;
- /* Match bool exactly, by re-using it. */
- boolkp = *kp;
boolkp.arg = &v;
ret = param_set_bool(val, &boolkp);
}
EXPORT_SYMBOL(param_set_bint);
-struct kernel_param_ops param_ops_bint = {
+const struct kernel_param_ops param_ops_bint = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bint,
.get = param_get_int,
EXPORT_SYMBOL(param_ops_bint);
/* We break the rule and mangle the string. */
-static int param_array(const char *name,
+static int param_array(struct module *mod,
+ const char *name,
const char *val,
unsigned int min, unsigned int max,
void *elem, int elemsize,
/* nul-terminate and parse */
save = val[len];
((char *)val)[len] = '\0';
- BUG_ON(!mutex_is_locked(¶m_lock));
+ check_kparam_locked(mod);
ret = set(val, &kp);
if (ret != 0)
const struct kparam_array *arr = kp->arr;
unsigned int temp_num;
- return param_array(kp->name, val, 1, arr->max, arr->elem,
+ return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem,
arr->elemsize, arr->ops->set, kp->level,
arr->num ?: &temp_num);
}
{
int i, off, ret;
const struct kparam_array *arr = kp->arr;
- struct kernel_param p;
+ struct kernel_param p = *kp;
- p = *kp;
for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
if (i)
buffer[off++] = ',';
p.arg = arr->elem + arr->elemsize * i;
- BUG_ON(!mutex_is_locked(¶m_lock));
+ check_kparam_locked(p.mod);
ret = arr->ops->get(buffer + off, &p);
if (ret < 0)
return ret;
arr->ops->free(arr->elem + arr->elemsize * i);
}
-struct kernel_param_ops param_array_ops = {
+const struct kernel_param_ops param_array_ops = {
.set = param_array_set,
.get = param_array_get,
.free = param_array_free,
}
EXPORT_SYMBOL(param_get_string);
-struct kernel_param_ops param_ops_string = {
+const struct kernel_param_ops param_ops_string = {
.set = param_set_copystring,
.get = param_get_string,
};
if (!attribute->param->ops->get)
return -EPERM;
- mutex_lock(¶m_lock);
+ kernel_param_lock(mk->mod);
count = attribute->param->ops->get(buf, attribute->param);
- mutex_unlock(¶m_lock);
+ kernel_param_unlock(mk->mod);
if (count > 0) {
strcat(buf, "\n");
++count;
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
static ssize_t param_attr_store(struct module_attribute *mattr,
- struct module_kobject *km,
+ struct module_kobject *mk,
const char *buf, size_t len)
{
int err;
if (!attribute->param->ops->set)
return -EPERM;
- mutex_lock(¶m_lock);
+ kernel_param_lock(mk->mod);
param_check_unsafe(attribute->param);
err = attribute->param->ops->set(buf, attribute->param);
- mutex_unlock(¶m_lock);
+ kernel_param_unlock(mk->mod);
if (!err)
return len;
return err;
#endif
#ifdef CONFIG_SYSFS
-void __kernel_param_lock(void)
+void kernel_param_lock(struct module *mod)
{
- mutex_lock(¶m_lock);
+ mutex_lock(KPARAM_MUTEX(mod));
}
-EXPORT_SYMBOL(__kernel_param_lock);
-void __kernel_param_unlock(void)
+void kernel_param_unlock(struct module *mod)
{
- mutex_unlock(¶m_lock);
+ mutex_unlock(KPARAM_MUTEX(mod));
}
-EXPORT_SYMBOL(__kernel_param_unlock);
+
+EXPORT_SYMBOL(kernel_param_lock);
+EXPORT_SYMBOL(kernel_param_unlock);
/*
* add_sysfs_param - add a parameter to sysfs
mk = locate_module_kobject(vattr->module_name);
if (mk) {
err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
+ WARN_ON_ONCE(err);
kobject_uevent(&mk->kobj, KOBJ_ADD);
kobject_put(&mk->kobj);
}
config DPM_WATCHDOG_TIMEOUT
int "Watchdog timeout in seconds"
range 1 120
- default 12
+ default 60
depends on DPM_WATCHDOG
config PM_TRACE
error = disable_nonboot_cpus();
if (error)
- goto Platform_finish;
+ goto Enable_cpus;
local_irq_disable();
syscore_suspend();
Power_up:
syscore_resume();
local_irq_enable();
+
+ Enable_cpus:
enable_nonboot_cpus();
Platform_finish:
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
-
-$(obj)/time.o: $(objtree)/include/config/
* We want to use this from any context including NMI and tracing /
* instrumenting the timekeeping code itself.
*
- * So we handle this differently than the other timekeeping accessor
- * functions which retry when the sequence count has changed. The
- * update side does:
- *
- * smp_wmb(); <- Ensure that the last base[1] update is visible
- * tkf->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[0], tkr);
- * smp_wmb(); <- Ensure that the base[0] update is visible
- * tkf->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[1], tkr);
- *
- * The reader side does:
- *
- * do {
- * seq = tkf->seq;
- * smp_rmb();
- * idx = seq & 0x01;
- * now = now(tkf->base[idx]);
- * smp_rmb();
- * } while (seq != tkf->seq)
- *
- * As long as we update base[0] readers are forced off to
- * base[1]. Once base[0] is updated readers are redirected to base[0]
- * and the base[1] update takes place.
+ * Employ the latch technique; see @raw_write_seqcount_latch.
*
* So if a NMI hits the update of base[0] then it will use base[1]
* which is still consistent. In the worst case this can result is a
u64 now;
do {
- seq = raw_read_seqcount(&tkf->seq);
+ seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tkf->seq, seq));
BUG_ON(cpu_online(cpu));
old_base = per_cpu_ptr(&tvec_bases, cpu);
- new_base = this_cpu_ptr(&tvec_bases);
+ new_base = get_cpu_ptr(&tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
spin_unlock(&old_base->lock);
spin_unlock_irq(&new_base->lock);
+ put_cpu_ptr(&tvec_bases);
}
static int timer_cpu_notify(struct notifier_block *self,
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
/* see the comment above the definition of WQ_POWER_EFFICIENT */
-#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
-static bool wq_power_efficient = true;
-#else
-static bool wq_power_efficient;
-#endif
-
+static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
struct module *mod;
const struct bug_entry *bug = NULL;
- rcu_read_lock();
+ rcu_read_lock_sched();
list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
unsigned i;
}
bug = NULL;
out:
- rcu_read_unlock();
+ rcu_read_unlock_sched();
return bug;
}
char *secstrings;
unsigned int i;
+ lockdep_assert_held(&module_mutex);
+
mod->bug_table = NULL;
mod->num_bugs = 0;
void module_bug_cleanup(struct module *mod)
{
+ lockdep_assert_held(&module_mutex);
list_del_rcu(&mod->bug_list);
}
kfree(devpath);
return error;
}
+EXPORT_SYMBOL_GPL(kobject_move);
/**
* kobject_del - unlink kobject from hierarchy.
* parentheses and have some accompanying text comment.
*/
+/*
+ * Notes on lockless lookups:
+ *
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
+ * tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * It also guarantees that if the lookup returns an element it is the 'correct'
+ * one. But not returning an element does _NOT_ mean it's not present.
+ *
+ * NOTE:
+ *
+ * Stores to __rb_parent_color are not important for simple lookups so those
+ * are left undone as of now. Nor did I check for loops involving parent
+ * pointers.
+ */
+
static inline void rb_set_black(struct rb_node *rb)
{
rb->__rb_parent_color |= RB_BLACK;
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
- parent->rb_right = tmp = node->rb_left;
- node->rb_left = parent;
+ tmp = node->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp);
+ WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
* / \
* n U
*/
- gparent->rb_left = tmp; /* == parent->rb_right */
- parent->rb_right = gparent;
+ WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
+ WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
- parent->rb_left = tmp = node->rb_right;
- node->rb_right = parent;
+ tmp = node->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp);
+ WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
}
/* Case 3 - left rotate at gparent */
- gparent->rb_right = tmp; /* == parent->rb_left */
- parent->rb_left = gparent;
+ WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
+ WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
* / \ / \
* Sl Sr N Sl
*/
- parent->rb_right = tmp1 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp1 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp1);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
* \
* Sr
*/
- sibling->rb_left = tmp1 = tmp2->rb_right;
- tmp2->rb_right = sibling;
- parent->rb_right = tmp2;
+ tmp1 = tmp2->rb_right;
+ WRITE_ONCE(sibling->rb_left, tmp1);
+ WRITE_ONCE(tmp2->rb_right, sibling);
+ WRITE_ONCE(parent->rb_right, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
* / \ / \
* (sl) sr N (sl)
*/
- parent->rb_right = tmp2 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp2 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp2);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
sibling = parent->rb_left;
if (rb_is_red(sibling)) {
/* Case 1 - right rotate at parent */
- parent->rb_left = tmp1 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp1 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp1);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
break;
}
/* Case 3 - right rotate at sibling */
- sibling->rb_right = tmp1 = tmp2->rb_left;
- tmp2->rb_left = sibling;
- parent->rb_left = tmp2;
+ tmp1 = tmp2->rb_left;
+ WRITE_ONCE(sibling->rb_right, tmp1);
+ WRITE_ONCE(tmp2->rb_left, sibling);
+ WRITE_ONCE(parent->rb_left, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
sibling = tmp2;
}
/* Case 4 - left rotate at parent + color flips */
- parent->rb_left = tmp2 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp2 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp2);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
}
}
-static void new_kmalloc_cache(int idx, unsigned long flags)
+static void __init new_kmalloc_cache(int idx, unsigned long flags)
{
kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
kmalloc_info[idx].size, flags);
static void ax25_free_sock(struct sock *sk)
{
- ax25_cb_put(ax25_sk(sk));
+ ax25_cb_put(sk_to_ax25(sk));
}
/*
while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
if (skb->sk != ax25->sk) {
/* A pending connection */
- ax25_cb *sax25 = ax25_sk(skb->sk);
+ ax25_cb *sax25 = sk_to_ax25(skb->sk);
/* Queue the unaccepted socket for death */
sock_orphan(skb->sk);
return -EFAULT;
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
switch (optname) {
case AX25_WINDOW:
length = min_t(unsigned int, maxlen, sizeof(int));
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
switch (optname) {
case AX25_WINDOW:
static struct proto ax25_proto = {
.name = "AX25",
.owner = THIS_MODULE,
- .obj_size = sizeof(struct sock),
+ .obj_size = sizeof(struct ax25_sock),
};
static int ax25_create(struct net *net, struct socket *sock, int protocol,
if (sk == NULL)
return -ENOMEM;
- ax25 = sk->sk_protinfo = ax25_create_cb();
+ ax25 = ax25_sk(sk)->cb = ax25_create_cb();
if (!ax25) {
sk_free(sk);
return -ENOMEM;
sk->sk_state = TCP_ESTABLISHED;
sock_copy_flags(sk, osk);
- oax25 = ax25_sk(osk);
+ oax25 = sk_to_ax25(osk);
ax25->modulus = oax25->modulus;
ax25->backoff = oax25->backoff;
}
}
- sk->sk_protinfo = ax25;
+ ax25_sk(sk)->cb = ax25;
sk->sk_destruct = ax25_free_sock;
ax25->sk = sk;
sock_hold(sk);
sock_orphan(sk);
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (!sock_flag(sk, SOCK_ZAPPED)) {
err = -EINVAL;
goto out;
struct sockaddr *uaddr, int addr_len, int flags)
{
struct sock *sk = sock->sk;
- ax25_cb *ax25 = ax25_sk(sk), *ax25t;
+ ax25_cb *ax25 = sk_to_ax25(sk), *ax25t;
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
ax25_digi *digi = NULL;
int ct = 0, err = 0;
memset(fsa, 0, sizeof(*fsa));
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (peer != 0) {
if (sk->sk_state != TCP_ESTABLISHED) {
return -EINVAL;
lock_sock(sk);
- ax25 = ax25_sk(sk);
+ ax25 = sk_to_ax25(sk);
if (sock_flag(sk, SOCK_ZAPPED)) {
err = -EADDRNOTAVAIL;
if (skb == NULL)
goto out;
- if (!ax25_sk(sk)->pidincl)
+ if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
skb_reset_transport_header(skb);
case SIOCAX25GETINFO:
case SIOCAX25GETINFOOLD: {
- ax25_cb *ax25 = ax25_sk(sk);
+ ax25_cb *ax25 = sk_to_ax25(sk);
struct ax25_info_struct ax25_info;
ax25_info.t1 = ax25->t1 / HZ;
return 0;
}
- ax25 = ax25_sk(make);
+ ax25 = sk_to_ax25(make);
skb_set_owner_r(skb, make);
skb_queue_head(&sk->sk_receive_queue, skb);
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_keyid *key_keyid;
- u8 ip_proto;
+ u8 ip_proto = 0;
if (!data) {
data = skb->data;
static void __sk_free(struct sock *sk)
{
- if (unlikely(sock_diag_has_destroy_listeners(sk)))
+ if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
sock_diag_broadcast_destroy(sk);
else
sk_destruct(sk);
static void sock_def_destruct(struct sock *sk)
{
- kfree(sk->sk_protinfo);
}
void sk_send_sigurg(struct sock *sk)
clear_promisc:
if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, 0);
+ dev_set_promiscuity(master, -1);
clear_allmulti:
if (dev->flags & IFF_ALLMULTI)
dev_set_allmulti(master, -1);
nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
goto nla_put_failure;
if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
+ in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtm->rtm_flags |= RTNH_F_DEAD;
rtnh->rtnh_flags = nh->nh_flags & 0xFF;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rcu(nh->nh_dev);
+ in_dev = __in_dev_get_rtnl(nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtnh->rtnh_flags |= RTNH_F_DEAD;
const struct rate_control_ops *ops;
const char *alg_name;
- kparam_block_sysfs_write(ieee80211_default_rc_algo);
+ kernel_param_lock(THIS_MODULE);
if (!name)
alg_name = ieee80211_default_rc_algo;
else
/* try built-in one if specific alg requested but not found */
if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
- kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
+ kernel_param_unlock(THIS_MODULE);
return ops;
}
[TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 },
};
static void fl_set_key_val(struct nlattr **tb,
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+
+ if (asoc)
+ IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
if (sp->subscribe.sctp_data_io_event)
sctp_ulpevent_read_sndrcvinfo(event, msg);
-#if 0
- /* FIXME: we should be calling IP/IPv6 layers. */
- if (sk->sk_protinfo.af_inet.cmsg_flags)
- ip_cmsg_recv(msg, skb);
-#endif
-
err = copied;
/* If skb's length exceeds the user's buffer, update the skb and
#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
-static struct kernel_param_ops param_ops_hashtbl_sz = {
+static const struct kernel_param_ops param_ops_hashtbl_sz = {
.set = param_set_hashtbl_sz,
.get = param_get_hashtbl_sz,
};
RPC_MAX_RESVPORT);
}
-static struct kernel_param_ops param_ops_portnr = {
+static const struct kernel_param_ops param_ops_portnr = {
.set = param_set_portnr,
.get = param_get_uint,
};
RPC_MAX_SLOT_TABLE);
}
-static struct kernel_param_ops param_ops_slot_table_size = {
+static const struct kernel_param_ops param_ops_slot_table_size = {
.set = param_set_slot_table_size,
.get = param_get_uint,
};
RPC_MAX_SLOT_TABLE_LIMIT);
}
-static struct kernel_param_ops param_ops_max_slot_table_size = {
+static const struct kernel_param_ops param_ops_max_slot_table_size = {
.set = param_set_max_slot_table_size,
.get = param_get_uint,
};
tipc_bclink_lock(net);
tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
+
+ /* Last node? => reset backlog queue */
+ if (!tn->bclink->bcast_nodes.count)
+ tipc_link_purge_backlog(&tn->bclink->link);
+
tipc_bclink_unlock(net);
}
l_ptr->reasm_buf = NULL;
}
-static void tipc_link_purge_backlog(struct tipc_link *l)
+void tipc_link_purge_backlog(struct tipc_link *l)
{
__skb_queue_purge(&l->backlogq);
l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
int tipc_link_is_up(struct tipc_link *l_ptr);
int tipc_link_is_active(struct tipc_link *l_ptr);
void tipc_link_purge_queues(struct tipc_link *l_ptr);
+void tipc_link_purge_backlog(struct tipc_link *l);
void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
kvmconfig: kvm_guest.config
@:
+PHONY += xenconfig
+xenconfig: xen.config
+ @:
+
PHONY += tinyconfig
tinyconfig:
$(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config
@echo ' randconfig - New config with random answer to all options'
@echo ' listnewconfig - List new options'
@echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their default value'
- @echo ' kvmconfig - Enable additional options for guest kernel support'
+ @echo ' kvmconfig - Enable additional options for kvm guest kernel support'
+ @echo ' xenconfig - Enable additional options for xen dom0 and guest kernel support'
@echo ' tinyconfig - Configure the tiniest possible kernel'
# lxdialog stuff
#define EM_MICROBLAZE 189
#endif
+#ifndef EM_ARCV2
+#define EM_ARCV2 195
+#endif
+
static int fd_map; /* File descriptor for file being modified. */
static int mmap_failed; /* Boolean flag. */
static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */
custom_sort = sort_relative_table;
break;
case EM_ARCOMPACT:
+ case EM_ARCV2:
case EM_ARM:
case EM_AARCH64:
case EM_MICROBLAZE:
static int param_set_aabool(const char *val, const struct kernel_param *kp);
static int param_get_aabool(char *buffer, const struct kernel_param *kp);
#define param_check_aabool param_check_bool
-static struct kernel_param_ops param_ops_aabool = {
+static const struct kernel_param_ops param_ops_aabool = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_aabool,
.get = param_get_aabool
static int param_set_aauint(const char *val, const struct kernel_param *kp);
static int param_get_aauint(char *buffer, const struct kernel_param *kp);
#define param_check_aauint param_check_uint
-static struct kernel_param_ops param_ops_aauint = {
+static const struct kernel_param_ops param_ops_aauint = {
.set = param_set_aauint,
.get = param_get_aauint
};
static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp);
static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
#define param_check_aalockpolicy param_check_bool
-static struct kernel_param_ops param_ops_aalockpolicy = {
+static const struct kernel_param_ops param_ops_aalockpolicy = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_aalockpolicy,
.get = param_get_aalockpolicy
return 0;
}
-static struct kernel_param_ops param_ops_bufsize = {
+static const struct kernel_param_ops param_ops_bufsize = {
.set = param_set_bufsize,
.get = param_get_uint,
};
sid.iface = SNDRV_CTL_ELEM_IFACE_CARD;
strlcpy(sid.name, name, sizeof(sid.name));
- while (snd_ctl_find_id(card, &sid))
+ while (snd_ctl_find_id(card, &sid)) {
sid.index++;
+ /* reset numid; otherwise snd_ctl_find_id() hits this again */
+ sid.numid = 0;
+ }
return sid.index;
}
static int init_info_for_card(struct snd_card *card)
{
- int err;
struct snd_info_entry *entry;
entry = snd_info_create_card_entry(card, "id", card->proc_root);
if (!entry) {
dev_dbg(card->dev, "unable to create card entry\n");
- return err;
+ return -ENOMEM;
}
entry->c.text.read = snd_card_id_read;
card->proc_id = entry;
#ifdef CONFIG_PM
static int param_set_xint(const char *val, const struct kernel_param *kp);
-static struct kernel_param_ops param_ops_xint = {
+static const struct kernel_param_ops param_ops_xint = {
.set = param_set_xint,
.get = param_get_int,
};
{ PCI_DEVICE(0x1022, 0x780d),
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
/* ATI HDMI */
+ { PCI_DEVICE(0x1002, 0x1308),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
{ PCI_DEVICE(0x1002, 0x793b),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
{ PCI_DEVICE(0x1002, 0x7919),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
{ PCI_DEVICE(0x1002, 0x970f),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
+ { PCI_DEVICE(0x1002, 0x9840),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
{ PCI_DEVICE(0x1002, 0xaa00),
.driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
{ PCI_DEVICE(0x1002, 0xaa08),
static void eld_proc_free(struct hdmi_spec_per_pin *per_pin)
{
- if (!per_pin->codec->bus->shutdown && per_pin->proc_entry) {
+ if (!per_pin->codec->bus->shutdown) {
snd_info_free_entry(per_pin->proc_entry);
per_pin->proc_entry = NULL;
}
ALC269_FIXUP_LIFEBOOK,
ALC269_FIXUP_LIFEBOOK_EXTMIC,
ALC269_FIXUP_LIFEBOOK_HP_PIN,
+ ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT,
ALC269_FIXUP_AMIC,
ALC269_FIXUP_DMIC,
ALC269VB_FIXUP_AMIC,
ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
ALC269_FIXUP_HEADSET_MODE,
ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
+ ALC269_FIXUP_ASPIRE_HEADSET_MIC,
ALC269_FIXUP_ASUS_X101_FUNC,
ALC269_FIXUP_ASUS_X101_VERB,
ALC269_FIXUP_ASUS_X101,
ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC292_FIXUP_TPT440_DOCK,
+ ALC292_FIXUP_TPT440_DOCK2,
ALC283_FIXUP_BXBT2807_MIC,
ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
ALC282_FIXUP_ASPIRE_V5_PINS,
ALC288_FIXUP_DELL_HEADSET_MODE,
ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC288_FIXUP_DELL_XPS_13_GPIO6,
+ ALC288_FIXUP_DELL_XPS_13,
+ ALC288_FIXUP_DISABLE_AAMIX,
ALC292_FIXUP_DELL_E7X,
ALC292_FIXUP_DISABLE_AAMIX,
ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
{ }
},
},
+ [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
+ },
[ALC269_FIXUP_AMIC] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_headset_mode_no_hp_mic,
},
+ [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE,
+ },
[ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
.chain_id = ALC269_FIXUP_HEADSET_MODE
},
[ALC292_FIXUP_TPT440_DOCK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
+ .chained = true,
+ .chain_id = ALC292_FIXUP_TPT440_DOCK2
+ },
+ [ALC292_FIXUP_TPT440_DOCK2] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
{ 0x16, 0x21211010 }, /* dock headphone */
.chained = true,
.chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE
},
+ [ALC288_FIXUP_DISABLE_AAMIX] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6
+ },
+ [ALC288_FIXUP_DELL_XPS_13] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_dell_xps13,
+ .chained = true,
+ .chain_id = ALC288_FIXUP_DISABLE_AAMIX
+ },
[ALC292_FIXUP_DISABLE_AAMIX] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE
},
[ALC292_FIXUP_DELL_E7X] = {
.type = HDA_FIXUP_FUNC,
SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700),
+ SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK),
SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK),
+ SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X),
+ SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13),
SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+ SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT),
SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
struct snd_ctl_elem_value *ucontrol)
{
struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
- ucontrol->value.enumerated.item[0] = codec->power_save_node;
+ struct via_spec *spec = codec->spec;
+
+ ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused;
return 0;
}
struct via_spec *spec = codec->spec;
bool val = !!ucontrol->value.enumerated.item[0];
- if (val == codec->power_save_node)
+ if (val == spec->gen.power_down_unused)
return 0;
- codec->power_save_node = val;
+ /* codec->power_save_node = val; */ /* widget PM seems yet broken */
spec->gen.power_down_unused = val;
analog_low_current_mode(codec);
return 1;