diff --git a/Documentation/ABI/testing/configfs-usb-gadget-loopback b/Documentation/ABI/testing/configfs-usb-gadget-loopback index 9aae5bfb99088..06beefbcf061a 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-loopback +++ b/Documentation/ABI/testing/configfs-usb-gadget-loopback @@ -5,4 +5,4 @@ Description: The attributes: qlen - depth of loopback queue - bulk_buflen - buffer length + buflen - buffer length diff --git a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink index 29477c319f61b..bc7ff731aa0cf 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink +++ b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink @@ -9,4 +9,4 @@ Description: isoc_maxpacket - 0 - 1023 (fs), 0 - 1024 (hs/ss) isoc_mult - 0..2 (hs/ss only) isoc_maxburst - 0..15 (ss only) - qlen - buffer length + buflen - buffer length diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index d0d0c578324c7..0a378a88217a4 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -20,17 +20,19 @@ Description: action: measure | dont_measure | appraise | dont_appraise | audit condition:= base | lsm [option] base: [[func=] [mask=] [fsmagic=] [fsuuid=] [uid=] - [fowner]] + [euid=] [fowner=]] lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] option: [[appraise_type=]] [permit_directio] base: func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK] [FIRMWARE_CHECK] - mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] + mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND] + [[^]MAY_EXEC] fsmagic:= hex value fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6) uid:= decimal value + euid:= decimal value fowner:=decimal value lsm: are LSM specific option: appraise_type:= [imasig] @@ -49,11 +51,25 @@ Description: dont_measure fsmagic=0x01021994 dont_appraise fsmagic=0x01021994 # RAMFS_MAGIC - dont_measure fsmagic=0x858458f6 dont_appraise fsmagic=0x858458f6 + # DEVPTS_SUPER_MAGIC + dont_measure fsmagic=0x1cd1 + dont_appraise fsmagic=0x1cd1 + # BINFMTFS_MAGIC + dont_measure fsmagic=0x42494e4d + dont_appraise fsmagic=0x42494e4d # SECURITYFS_MAGIC dont_measure fsmagic=0x73636673 dont_appraise fsmagic=0x73636673 + # SELINUX_MAGIC + dont_measure fsmagic=0xf97cff8c + dont_appraise fsmagic=0xf97cff8c + # CGROUP_SUPER_MAGIC + dont_measure fsmagic=0x27e0eb + dont_appraise fsmagic=0x27e0eb + # NSFS_MAGIC + dont_measure fsmagic=0x6e736673 + dont_appraise fsmagic=0x6e736673 measure func=BPRM_CHECK measure func=FILE_MMAP mask=MAY_EXEC @@ -70,10 +86,6 @@ Description: Examples of LSM specific definitions: SELinux: - # SELINUX_MAGIC - dont_measure fsmagic=0xf97cff8c - dont_appraise fsmagic=0xf97cff8c - dont_measure obj_type=var_log_t dont_appraise obj_type=var_log_t dont_measure obj_type=auditd_log_t diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index 0a932155cbbaf..9231daef3813e 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -90,6 +90,17 @@ gscr 130: SATA_PMP_GSCR_SII_GPIO Only valid if the device is a PM. +trim + + Shows the DSM TRIM mode currently used by the device. Valid + values are: + unsupported: Drive does not support DSM TRIM + unqueued: Drive supports unqueued DSM TRIM only + queued: Drive supports queued DSM TRIM + forced_unqueued: Drive's unqueued DSM support is known to be + buggy and only unqueued TRIM commands + are sent + spdn_cnt Number of time libata decided to lower the speed of link due to errors. diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 3befcb19f4141..1fbdd79d16240 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1165,10 +1165,8 @@ Description: object is near the sensor, usually be observing reflectivity of infrared or ultrasound emitted. Often these sensors are unit less and as such conversion - to SI units is not possible. Where it is, the units should - be meters. If such a conversion is not possible, the reported - values should behave in the same way as a distance, i.e. lower - values indicate something is closer to the sensor. + to SI units is not possible. Higher proximity measurements + indicate closer objects, and vice versa. What: /sys/.../iio:deviceX/in_illuminance_input What: /sys/.../iio:deviceX/in_illuminance_raw diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 0f7afb2bb442e..aef8cc5a677bd 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -25,13 +25,18 @@ physical addresses. These are the addresses in /proc/iomem. The physical address is not directly useful to a driver; it must use ioremap() to map the space and produce a virtual address. -I/O devices use a third kind of address: a "bus address" or "DMA address". -If a device has registers at an MMIO address, or if it performs DMA to read -or write system memory, the addresses used by the device are bus addresses. -In some systems, bus addresses are identical to CPU physical addresses, but -in general they are not. IOMMUs and host bridges can produce arbitrary +I/O devices use a third kind of address: a "bus address". If a device has +registers at an MMIO address, or if it performs DMA to read or write system +memory, the addresses used by the device are bus addresses. In some +systems, bus addresses are identical to CPU physical addresses, but in +general they are not. IOMMUs and host bridges can produce arbitrary mappings between physical and bus addresses. +From a device's point of view, DMA uses the bus address space, but it may +be restricted to a subset of that space. For example, even if a system +supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU +so devices only need to use 32-bit DMA addresses. + Here's a picture and some examples: CPU CPU Bus @@ -72,11 +77,11 @@ can use virtual address X to access the buffer, but the device itself cannot because DMA doesn't go through the CPU virtual memory system. In some simple systems, the device can do DMA directly to physical address -Y. But in many others, there is IOMMU hardware that translates bus +Y. But in many others, there is IOMMU hardware that translates DMA addresses to physical addresses, e.g., it translates Z to Y. This is part of the reason for the DMA API: the driver can give a virtual address X to an interface like dma_map_single(), which sets up any required IOMMU -mapping and returns the bus address Z. The driver then tells the device to +mapping and returns the DMA address Z. The driver then tells the device to do DMA to Z, and the IOMMU maps it to the buffer at address Y in system RAM. @@ -98,7 +103,7 @@ First of all, you should make sure #include is in your driver, which provides the definition of dma_addr_t. This type -can hold any valid DMA or bus address for the platform and should be used +can hold any valid DMA address for the platform and should be used everywhere you hold a DMA address returned from the DMA mapping functions. What memory is DMA'able? @@ -316,7 +321,7 @@ There are two types of DMA mappings: Think of "consistent" as "synchronous" or "coherent". The current default is to return consistent memory in the low 32 - bits of the bus space. However, for future compatibility you should + bits of the DMA space. However, for future compatibility you should set the consistent mask even if this default is fine for your driver. @@ -403,7 +408,7 @@ dma_alloc_coherent() returns two values: the virtual address which you can use to access it from the CPU and dma_handle which you pass to the card. -The CPU virtual address and the DMA bus address are both +The CPU virtual address and the DMA address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk @@ -645,8 +650,8 @@ PLEASE NOTE: The 'nents' argument to the dma_unmap_sg call must be dma_map_sg call. Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}() -counterpart, because the bus address space is a shared resource and -you could render the machine unusable by consuming all bus addresses. +counterpart, because the DMA address space is a shared resource and +you could render the machine unusable by consuming all DMA addresses. If you need to use the same streaming DMA region multiple times and touch the data in between the DMA transfers, the buffer needs to be synced diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 52088408668a8..7eba542eff7c8 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -18,10 +18,10 @@ Part I - dma_ API To get the dma_ API, you must #include . This provides dma_addr_t and the interfaces described below. -A dma_addr_t can hold any valid DMA or bus address for the platform. It -can be given to a device to use as a DMA source or target. A CPU cannot -reference a dma_addr_t directly because there may be translation between -its physical address space and the bus address space. +A dma_addr_t can hold any valid DMA address for the platform. It can be +given to a device to use as a DMA source or target. A CPU cannot reference +a dma_addr_t directly because there may be translation between its physical +address space and the DMA address space. Part Ia - Using large DMA-coherent buffers ------------------------------------------ @@ -42,7 +42,7 @@ It returns a pointer to the allocated region (in the processor's virtual address space) or NULL if the allocation failed. It also returns a which may be cast to an unsigned integer the -same width as the bus and given to the device as the bus address base of +same width as the bus and given to the device as the DMA address base of the region. Note: consistent memory can be expensive on some platforms, and the @@ -193,7 +193,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) Maps a piece of processor virtual memory so it can be accessed by the -device and returns the bus address of the memory. +device and returns the DMA address of the memory. The direction for both APIs may be converted freely by casting. However the dma_ API uses a strongly typed enumerator for its @@ -212,20 +212,20 @@ contiguous piece of memory. For this reason, memory to be mapped by this API should be obtained from sources which guarantee it to be physically contiguous (like kmalloc). -Further, the bus address of the memory must be within the +Further, the DMA address of the memory must be within the dma_mask of the device (the dma_mask is a bit mask of the -addressable region for the device, i.e., if the bus address of -the memory ANDed with the dma_mask is still equal to the bus +addressable region for the device, i.e., if the DMA address of +the memory ANDed with the dma_mask is still equal to the DMA address, then the device can perform DMA to the memory). To ensure that the memory allocated by kmalloc is within the dma_mask, the driver may specify various platform-dependent flags to restrict -the bus address range of the allocation (e.g., on x86, GFP_DMA -guarantees to be within the first 16MB of available bus addresses, +the DMA address range of the allocation (e.g., on x86, GFP_DMA +guarantees to be within the first 16MB of available DMA addresses, as required by ISA devices). Note also that the above constraints on physical contiguity and dma_mask may not apply if the platform has an IOMMU (a device which -maps an I/O bus address to a physical memory address). However, to be +maps an I/O DMA address to a physical memory address). However, to be portable, device driver writers may *not* assume that such an IOMMU exists. @@ -296,7 +296,7 @@ reduce current DMA mapping usage or delay and try again later). dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) -Returns: the number of bus address segments mapped (this may be shorter +Returns: the number of DMA address segments mapped (this may be shorter than passed in if some elements of the scatter/gather list are physically or virtually adjacent and an IOMMU maps them with a single entry). @@ -340,7 +340,7 @@ must be the same as those and passed in to the scatter/gather mapping API. Note: must be the number you passed in, *not* the number of -bus address entries returned. +DMA address entries returned. void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -507,7 +507,7 @@ it's asked for coherent memory for this device. phys_addr is the CPU physical address to which the memory is currently assigned (this will be ioremapped so the CPU can access the region). -device_addr is the bus address the device needs to be programmed +device_addr is the DMA address the device needs to be programmed with to actually address this memory (this will be handed out as the dma_addr_t in dma_alloc_coherent()). diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 93aa8604630e7..21152d397b88e 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -218,16 +218,16 @@ The development process Linux kernel development process currently consists of a few different main kernel "branches" and lots of different subsystem-specific kernel branches. These different branches are: - - main 3.x kernel tree - - 3.x.y -stable kernel tree - - 3.x -git kernel patches + - main 4.x kernel tree + - 4.x.y -stable kernel tree + - 4.x -git kernel patches - subsystem specific kernel trees and patches - - the 3.x -next kernel tree for integration tests + - the 4.x -next kernel tree for integration tests -3.x kernel tree +4.x kernel tree ----------------- -3.x kernels are maintained by Linus Torvalds, and can be found on -kernel.org in the pub/linux/kernel/v3.x/ directory. Its development +4.x kernels are maintained by Linus Torvalds, and can be found on +kernel.org in the pub/linux/kernel/v4.x/ directory. Its development process is as follows: - As soon as a new kernel is released a two weeks window is open, during this period of time maintainers can submit big diffs to @@ -262,20 +262,20 @@ mailing list about kernel releases: released according to perceived bug status, not according to a preconceived timeline." -3.x.y -stable kernel tree +4.x.y -stable kernel tree --------------------------- Kernels with 3-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant -regressions discovered in a given 3.x kernel. +regressions discovered in a given 4.x kernel. This is the recommended branch for users who want the most recent stable kernel and are not interested in helping test development/experimental versions. -If no 3.x.y kernel is available, then the highest numbered 3.x +If no 4.x.y kernel is available, then the highest numbered 4.x kernel is the current stable kernel. -3.x.y are maintained by the "stable" team , and +4.x.y are maintained by the "stable" team , and are released as needs dictate. The normal release period is approximately two weeks, but it can be longer if there are no pressing problems. A security-related problem, instead, can cause a release to happen almost @@ -285,7 +285,7 @@ The file Documentation/stable_kernel_rules.txt in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and how the release process works. -3.x -git patches +4.x -git patches ------------------ These are daily snapshots of Linus' kernel tree which are managed in a git repository (hence the name.) These patches are usually released @@ -317,9 +317,9 @@ revisions to it, and maintainers can mark patches as under review, accepted, or rejected. Most of these patchwork sites are listed at http://patchwork.kernel.org/. -3.x -next kernel tree for integration tests +4.x -next kernel tree for integration tests --------------------------------------------- -Before updates from subsystem trees are merged into the mainline 3.x +Before updates from subsystem trees are merged into the mainline 4.x tree, they need to be integration-tested. For this purpose, a special testing repository exists into which virtually all subsystem trees are pulled on an almost daily basis: diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt index 225990f79b7c5..47570d2072159 100644 --- a/Documentation/devicetree/bindings/clock/keystone-pll.txt +++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt @@ -15,8 +15,8 @@ Required properties: - compatible : shall be "ti,keystone,main-pll-clock" or "ti,keystone,pll-clock" - clocks : parent clock phandle - reg - pll control0 and pll multipler registers -- reg-names : control and multiplier. The multiplier is applicable only for - main pll clock +- reg-names : control, multiplier and post-divider. The multiplier and + post-divider registers are applicable only for main pll clock - fixed-postdiv : fixed post divider value. If absent, use clkod register bits for postdiv @@ -25,8 +25,8 @@ Example: #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; fixed-postdiv = <2>; }; diff --git a/Documentation/devicetree/bindings/mfd/mfd.txt b/Documentation/devicetree/bindings/mfd/mfd.txt new file mode 100644 index 0000000000000..af9d6931a1a25 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/mfd.txt @@ -0,0 +1,41 @@ +Multi-Function Devices (MFD) + +These devices comprise a nexus for heterogeneous hardware blocks containing +more than one non-unique yet varying hardware functionality. + +A typical MFD can be: + +- A mixed signal ASIC on an external bus, sometimes a PMIC (Power Management + Integrated Circuit) that is manufactured in a lower technology node (rough + silicon) that handles analog drivers for things like audio amplifiers, LED + drivers, level shifters, PHY (physical interfaces to things like USB or + ethernet), regulators etc. + +- A range of memory registers containing "miscellaneous system registers" also + known as a system controller "syscon" or any other memory range containing a + mix of unrelated hardware devices. + +Optional properties: + +- compatible : "simple-mfd" - this signifies that the operating system should + consider all subnodes of the MFD device as separate devices akin to how + "simple-bus" inidicates when to see subnodes as children for a simple + memory-mapped bus. For more complex devices, when the nexus driver has to + probe registers to figure out what child devices exist etc, this should not + be used. In the latter case the child devices will be determined by the + operating system. + +Example: + +foo@1000 { + compatible = "syscon", "simple-mfd"; + reg = <0x01000 0x1000>; + + led@08.0 { + compatible = "register-bit-led"; + offset = <0x08>; + mask = <0x01>; + label = "myled"; + default-state = "on"; + }; +}; diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 41b3f3f864e84..5d88f37480b6a 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -25,7 +25,11 @@ The following properties are common to the Ethernet controllers: flow control thresholds. - tx-fifo-depth: the size of the controller's transmit fifo in bytes. This is used for components that can have configurable fifo sizes. +- managed: string, specifies the PHY management type. Supported values are: + "auto", "in-band-status". "auto" is the default, it usess MDIO for + management if fixed-link is not specified. Child nodes of the Ethernet controller are typically the individual PHY devices connected via the MDIO bus (sometimes the MDIO bus controller is separate). They are described in the phy.txt file in this same directory. +For non-MDIO PHY management see fixed-link.txt. diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index 750d577e8083e..f5a8ca29aff06 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -1,7 +1,7 @@ * Marvell Armada 370 / Armada XP Ethernet Controller (NETA) Required properties: -- compatible: should be "marvell,armada-370-neta". +- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta". - reg: address and length of the register set for the device. - interrupts: interrupt for the device - phy: See ethernet.txt file in the same directory. diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt index adda2a8d1d529..e357b020861d6 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt @@ -92,5 +92,5 @@ mpp61 61 gpo, dev(wen1), uart1(txd), audio(rclk) mpp62 62 gpio, dev(a2), uart1(cts), tdm(drx), pcie(clkreq0), audio(mclk), uart0(cts) mpp63 63 gpo, spi0(sck), tclk -mpp64 64 gpio, spi0(miso), spi0-1(cs1) -mpp65 65 gpio, spi0(mosi), spi0-1(cs2) +mpp64 64 gpio, spi0(miso), spi0(cs1) +mpp65 65 gpio, spi0(mosi), spi0(cs2) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt index 7de0cda4a3791..bedbe42c8c0ac 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt @@ -22,8 +22,8 @@ mpp5 5 gpio, dev(ad7), spi0(cs2), spi1(cs2) mpp6 6 gpio, dev(ad0), led(p1), audio(rclk) mpp7 7 gpio, dev(ad1), ptp(clk), led(p2), audio(extclk) mpp8 8 gpio, dev (bootcs), spi0(cs0), spi1(cs0) -mpp9 9 gpio, nf(wen), spi0(sck), spi1(sck) -mpp10 10 gpio, nf(ren), dram(vttctrl), led(c1) +mpp9 9 gpio, spi0(sck), spi1(sck), nand(we) +mpp10 10 gpio, dram(vttctrl), led(c1), nand(re) mpp11 11 gpio, dev(a0), led(c2), audio(sdo) mpp12 12 gpio, dev(a1), audio(bclk) mpp13 13 gpio, dev(readyn), pcie0(rstoutn), pcie1(rstoutn) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt index b17c96849fc9e..4ac138aaaf879 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt @@ -27,15 +27,15 @@ mpp8 8 gpio, ge0(txd1), dev(ad10) mpp9 9 gpio, ge0(txd2), dev(ad11) mpp10 10 gpio, ge0(txd3), dev(ad12) mpp11 11 gpio, ge0(txctl), dev(ad13) -mpp12 12 gpio, ge0(rxd0), pcie0(rstout), pcie1(rstout) [1], spi0(cs1), dev(ad14) -mpp13 13 gpio, ge0(rxd1), pcie0(clkreq), pcie1(clkreq) [1], spi0(cs2), dev(ad15) -mpp14 14 gpio, ge0(rxd2), ptp(clk), m(vtt_ctrl), spi0(cs3), dev(wen1) -mpp15 15 gpio, ge0(rxd3), ge(mdc slave), pcie0(rstout), spi0(mosi), pcie1(rstout) [1] -mpp16 16 gpio, ge0(rxctl), ge(mdio slave), m(decc_err), spi0(miso), pcie0(clkreq) +mpp12 12 gpio, ge0(rxd0), pcie0(rstout), spi0(cs1), dev(ad14), pcie3(clkreq) +mpp13 13 gpio, ge0(rxd1), pcie0(clkreq), pcie1(clkreq) [1], spi0(cs2), dev(ad15), pcie2(clkreq) +mpp14 14 gpio, ge0(rxd2), ptp(clk), m(vtt_ctrl), spi0(cs3), dev(wen1), pcie3(clkreq) +mpp15 15 gpio, ge0(rxd3), ge(mdc slave), pcie0(rstout), spi0(mosi) +mpp16 16 gpio, ge0(rxctl), ge(mdio slave), m(decc_err), spi0(miso), pcie0(clkreq), pcie1(clkreq) [1] mpp17 17 gpio, ge0(rxclk), ptp(clk), ua1(rxd), spi0(sck), sata1(prsnt) -mpp18 18 gpio, ge0(rxerr), ptp(trig_gen), ua1(txd), spi0(cs0), pcie1(rstout) [1] -mpp19 19 gpio, ge0(col), ptp(event_req), pcie0(clkreq), sata1(prsnt), ua0(cts) -mpp20 20 gpio, ge0(txclk), ptp(clk), pcie1(rstout) [1], sata0(prsnt), ua0(rts) +mpp18 18 gpio, ge0(rxerr), ptp(trig_gen), ua1(txd), spi0(cs0) +mpp19 19 gpio, ge0(col), ptp(event_req), ge0(txerr), sata1(prsnt), ua0(cts) +mpp20 20 gpio, ge0(txclk), ptp(clk), sata0(prsnt), ua0(rts) mpp21 21 gpio, spi0(cs1), ge1(rxd0), sata0(prsnt), sd0(cmd), dev(bootcs) mpp22 22 gpio, spi0(mosi), dev(ad0) mpp23 23 gpio, spi0(sck), dev(ad2) @@ -58,23 +58,23 @@ mpp39 39 gpio, i2c1(sck), ge1(rxd2), ua0(cts), sd0(d1), dev(a2) mpp40 40 gpio, i2c1(sda), ge1(rxd3), ua0(rts), sd0(d2), dev(ad6) mpp41 41 gpio, ua1(rxd), ge1(rxctl), ua0(cts), spi1(cs3), dev(burst/last) mpp42 42 gpio, ua1(txd), ua0(rts), dev(ad7) -mpp43 43 gpio, pcie0(clkreq), m(vtt_ctrl), m(decc_err), pcie0(rstout), dev(clkout) -mpp44 44 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [3], pcie0(rstout) -mpp45 45 gpio, ref(clk_out0), pcie0(rstout), pcie1(rstout) [1], pcie2(rstout), pcie3(rstout) -mpp46 46 gpio, ref(clk_out1), pcie0(rstout), pcie1(rstout) [1], pcie2(rstout), pcie3(rstout) -mpp47 47 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], spi1(cs2), sata3(prsnt) [2] -mpp48 48 gpio, sata0(prsnt), m(vtt_ctrl), tdm2c(pclk), audio(mclk), sd0(d4) -mpp49 49 gpio, sata2(prsnt) [2], sata3(prsnt) [2], tdm2c(fsync), audio(lrclk), sd0(d5) -mpp50 50 gpio, pcie0(rstout), pcie1(rstout) [1], tdm2c(drx), audio(extclk), sd0(cmd) +mpp43 43 gpio, pcie0(clkreq), m(vtt_ctrl), m(decc_err), spi1(cs2), dev(clkout) +mpp44 44 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [3] +mpp45 45 gpio, ref(clk_out0), pcie0(rstout) +mpp46 46 gpio, ref(clk_out1), pcie0(rstout) +mpp47 47 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [2] +mpp48 48 gpio, sata0(prsnt), m(vtt_ctrl), tdm2c(pclk), audio(mclk), sd0(d4), pcie0(clkreq) +mpp49 49 gpio, sata2(prsnt) [2], sata3(prsnt) [2], tdm2c(fsync), audio(lrclk), sd0(d5), pcie1(clkreq) +mpp50 50 gpio, pcie0(rstout), tdm2c(drx), audio(extclk), sd0(cmd) mpp51 51 gpio, tdm2c(dtx), audio(sdo), m(decc_err) -mpp52 52 gpio, pcie0(rstout), pcie1(rstout) [1], tdm2c(intn), audio(sdi), sd0(d6) +mpp52 52 gpio, pcie0(rstout), tdm2c(intn), audio(sdi), sd0(d6) mpp53 53 gpio, sata1(prsnt), sata0(prsnt), tdm2c(rstn), audio(bclk), sd0(d7) -mpp54 54 gpio, sata0(prsnt), sata1(prsnt), pcie0(rstout), pcie1(rstout) [1], sd0(d3) +mpp54 54 gpio, sata0(prsnt), sata1(prsnt), pcie0(rstout), ge0(txerr), sd0(d3) mpp55 55 gpio, ua1(cts), ge(mdio), pcie1(clkreq) [1], spi1(cs1), sd0(d0) mpp56 56 gpio, ua1(rts), ge(mdc), m(decc_err), spi1(mosi) mpp57 57 gpio, spi1(sck), sd0(clk) mpp58 58 gpio, pcie1(clkreq) [1], i2c1(sck), pcie2(clkreq), spi1(miso), sd0(d1) -mpp59 59 gpio, pcie0(rstout), i2c1(sda), pcie1(rstout) [1], spi1(cs0), sd0(d2) +mpp59 59 gpio, pcie0(rstout), i2c1(sda), spi1(cs0), sd0(d2) [1]: only available on 88F6820 and 88F6828 [2]: only available on 88F6828 diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt index 373dbccd7ab0e..96e7744cab844 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt @@ -42,15 +42,15 @@ mpp20 20 gpio, ge0(rxd4), ge1(rxd2), lcd(d20), ptp(clk) mpp21 21 gpio, ge0(rxd5), ge1(rxd3), lcd(d21), mem(bat) mpp22 22 gpio, ge0(rxd6), ge1(rxctl), lcd(d22), sata0(prsnt) mpp23 23 gpio, ge0(rxd7), ge1(rxclk), lcd(d23), sata1(prsnt) -mpp24 24 gpio, lcd(hsync), sata1(prsnt), nf(bootcs-re), tdm(rst) -mpp25 25 gpio, lcd(vsync), sata0(prsnt), nf(bootcs-we), tdm(pclk) -mpp26 26 gpio, lcd(clk), tdm(fsync), vdd(cpu1-pd) +mpp24 24 gpio, lcd(hsync), sata1(prsnt), tdm(rst) +mpp25 25 gpio, lcd(vsync), sata0(prsnt), tdm(pclk) +mpp26 26 gpio, lcd(clk), tdm(fsync) mpp27 27 gpio, lcd(e), tdm(dtx), ptp(trig) mpp28 28 gpio, lcd(pwm), tdm(drx), ptp(evreq) -mpp29 29 gpio, lcd(ref-clk), tdm(int0), ptp(clk), vdd(cpu0-pd) +mpp29 29 gpio, lcd(ref-clk), tdm(int0), ptp(clk) mpp30 30 gpio, tdm(int1), sd0(clk) -mpp31 31 gpio, tdm(int2), sd0(cmd), vdd(cpu0-pd) -mpp32 32 gpio, tdm(int3), sd0(d0), vdd(cpu1-pd) +mpp31 31 gpio, tdm(int2), sd0(cmd) +mpp32 32 gpio, tdm(int3), sd0(d0) mpp33 33 gpio, tdm(int4), sd0(d1), mem(bat) mpp34 34 gpio, tdm(int5), sd0(d2), sata0(prsnt) mpp35 35 gpio, tdm(int6), sd0(d3), sata1(prsnt) @@ -58,21 +58,18 @@ mpp36 36 gpio, spi(mosi) mpp37 37 gpio, spi(miso) mpp38 38 gpio, spi(sck) mpp39 39 gpio, spi(cs0) -mpp40 40 gpio, spi(cs1), uart2(cts), lcd(vga-hsync), vdd(cpu1-pd), - pcie(clkreq0) +mpp40 40 gpio, spi(cs1), uart2(cts), lcd(vga-hsync), pcie(clkreq0) mpp41 41 gpio, spi(cs2), uart2(rts), lcd(vga-vsync), sata1(prsnt), pcie(clkreq1) -mpp42 42 gpio, uart2(rxd), uart0(cts), tdm(int7), tdm-1(timer), - vdd(cpu0-pd) -mpp43 43 gpio, uart2(txd), uart0(rts), spi(cs3), pcie(rstout), - vdd(cpu2-3-pd){1} +mpp42 42 gpio, uart2(rxd), uart0(cts), tdm(int7), tdm-1(timer) +mpp43 43 gpio, uart2(txd), uart0(rts), spi(cs3), pcie(rstout) mpp44 44 gpio, uart2(cts), uart3(rxd), spi(cs4), pcie(clkreq2), mem(bat) mpp45 45 gpio, uart2(rts), uart3(txd), spi(cs5), sata1(prsnt) mpp46 46 gpio, uart3(rts), uart1(rts), spi(cs6), sata0(prsnt) mpp47 47 gpio, uart3(cts), uart1(cts), spi(cs7), pcie(clkreq3), ref(clkout) -mpp48 48 gpio, tclk, dev(burst/last) +mpp48 48 gpio, dev(clkout), dev(burst/last) * Marvell Armada XP (mv78260 and mv78460 only) @@ -84,9 +81,9 @@ mpp51 51 gpio, dev(ad16) mpp52 52 gpio, dev(ad17) mpp53 53 gpio, dev(ad18) mpp54 54 gpio, dev(ad19) -mpp55 55 gpio, dev(ad20), vdd(cpu0-pd) -mpp56 56 gpio, dev(ad21), vdd(cpu1-pd) -mpp57 57 gpio, dev(ad22), vdd(cpu2-3-pd){1} +mpp55 55 gpio, dev(ad20) +mpp56 56 gpio, dev(ad21) +mpp57 57 gpio, dev(ad22) mpp58 58 gpio, dev(ad23) mpp59 59 gpio, dev(ad24) mpp60 60 gpio, dev(ad25) @@ -96,6 +93,3 @@ mpp63 63 gpio, dev(ad28) mpp64 64 gpio, dev(ad29) mpp65 65 gpio, dev(ad30) mpp66 66 gpio, dev(ad31) - -Notes: -* {1} vdd(cpu2-3-pd) only available on mv78460. diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt index 22ed6797216d7..4d1673ca8cf80 100644 --- a/Documentation/devicetree/bindings/spi/spi_pl022.txt +++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt @@ -4,9 +4,9 @@ Required properties: - compatible : "arm,pl022", "arm,primecell" - reg : Offset and length of the register set for the device - interrupts : Should contain SPI controller interrupt +- num-cs : total number of chipselects Optional properties: -- num-cs : total number of chipselects - cs-gpios : should specify GPIOs used for chipselects. The gpios will be referred to as reg = in the SPI child nodes. If unspecified, a single SPI device without a chip select can be used. diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt index e180d56c75dbe..de773a00e2d47 100644 --- a/Documentation/devicetree/bindings/usb/atmel-usb.txt +++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt @@ -60,9 +60,9 @@ Atmel High-Speed USB device controller Required properties: - compatible: Should be one of the following - "at91sam9rl-udc" - "at91sam9g45-udc" - "sama5d3-udc" + "atmel,at91sam9rl-udc" + "atmel,at91sam9g45-udc" + "atmel,sama5d3-udc" - reg: Address and length of the register set for the device - interrupts: Should contain usba interrupt - ep childnode: To specify the number of endpoints and their properties. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index c3b6b301d8b00..749b7bae0c00e 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -140,7 +140,8 @@ Table 1-1: Process specific entries in /proc stat Process status statm Process memory status information status Process status in human readable form - wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + wchan Present with CONFIG_KALLSYMS=y: it shows the kernel function + symbol the task is blocked in - or "0" if not blocked. pagemap Page table stack Report full stack trace, enable via CONFIG_STACKTRACE smaps a extension based on maps, showing the memory consumption of @@ -309,7 +310,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) blocked bitmap of blocked signals sigign bitmap of ignored signals sigcatch bitmap of caught signals - wchan address where process went to sleep + 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead) 0 (place holder) 0 (place holder) exit_signal signal to send to parent thread on exit diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904 index 014f112e2a14e..57fffe33ebfcd 100644 --- a/Documentation/hwmon/nct7904 +++ b/Documentation/hwmon/nct7904 @@ -35,11 +35,11 @@ temp1_input Local temperature (1/1000 degree, temp[2-9]_input CPU temperatures (1/1000 degree, 0.125 degree resolution) -fan[1-4]_mode R/W, 0/1 for manual or SmartFan mode +pwm[1-4]_enable R/W, 1/2 for manual or SmartFan mode Setting SmartFan mode is supported only if it has been previously configured by BIOS (or configuration EEPROM) -fan[1-4]_pwm R/O in SmartFan mode, R/W in manual control mode +pwm[1-4] R/O in SmartFan mode, R/W in manual control mode The driver checks sensor control registers and does not export the sensors that are not enabled. Anyway, a sensor that is enabled may actually be not diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt index c86f2f1ae4f6a..1fec1135791d9 100644 --- a/Documentation/input/alps.txt +++ b/Documentation/input/alps.txt @@ -119,8 +119,10 @@ ALPS Absolute Mode - Protocol Version 2 byte 5: 0 z6 z5 z4 z3 z2 z1 z0 Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for -the DualPoint Stick. For non interleaved dualpoint devices the pointingstick -buttons get reported separately in the PSM, PSR and PSL bits. +the DualPoint Stick. The M, R and L bits signal the combined status of both +the pointingstick and touchpad buttons, except for Dell dualpoint devices +where the pointingstick buttons get reported separately in the PSM, PSR +and PSL bits. Dualpoint device -- interleaved packet format --------------------------------------------- diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 74b6c6d972109..d2b1c40cb6667 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -952,6 +952,14 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic mode) if this option is supported by $(AR). + ARCH_CPPFLAGS, ARCH_AFLAGS, ARCH_CFLAGS Overrides the kbuild defaults + + These variables are appended to the KBUILD_CPPFLAGS, + KBUILD_AFLAGS, and KBUILD_CFLAGS, respectively, after the + top-level Makefile has set any other flags. This provides a + means for an architecture to override the defaults. + + --- 6.2 Add prerequisites to archheaders: The archheaders: rule is used to generate header files that diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6726139bd2899..cd03a0faca8f9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1398,7 +1398,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The list of supported hash algorithms is defined in crypto/hash_info.h. - ima_tcb [IMA] + ima_policy= [IMA] + The builtin measurement policy to load during IMA + setup. Specyfing "tcb" as the value, measures all + programs exec'd, files mmap'd for exec, and all files + opened with the read mode bit set by either the + effective uid (euid=0) or uid=0. + Format: "tcb" + + ima_tcb [IMA] Deprecated. Use ima_policy= instead. Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all programs exec'd, files mmap'd for exec, and all files diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3e4c9b6b62b58..7fdba3fbf25a1 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1333,6 +1333,14 @@ accept_ra_from_local - BOOLEAN disabled if accept_ra_from_local is disabled on a specific interface. +accept_ra_min_hop_limit - INTEGER + Minimum hop limit Information in Router Advertisement. + + Hop limit Information in Router Advertisement less than this + variable shall be ignored. + + Default: 1 + accept_ra_pinfo - BOOLEAN Learn Prefix Information in Router Advertisement. diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt index f45b2bf4b41de..820664af8f6a4 100644 --- a/Documentation/usb/gadget-testing.txt +++ b/Documentation/usb/gadget-testing.txt @@ -237,9 +237,7 @@ Testing the LOOPBACK function ----------------------------- device: run the gadget -host: test-usb - -http://www.linux-usb.org/usbtest/testusb.c +host: test-usb (tools/usb/testusb.c) 8. MASS STORAGE function ======================== @@ -588,9 +586,8 @@ Testing the SOURCESINK function ------------------------------- device: run the gadget -host: test-usb +host: test-usb (tools/usb/testusb.c) -http://www.linux-usb.org/usbtest/testusb.c 16. UAC1 function ================= diff --git a/Makefile b/Makefile index f5c8983aeeb7f..39be1bbd373a3 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 0 +SUBLEVEL = 20 EXTRAVERSION = -NAME = Hurr durr I'ma sheep +NAME = Series 4800 # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -783,10 +783,11 @@ endif include scripts/Makefile.kasan include scripts/Makefile.extrawarn -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments -KBUILD_CPPFLAGS += $(KCPPFLAGS) -KBUILD_AFLAGS += $(KAFLAGS) -KBUILD_CFLAGS += $(KCFLAGS) +# Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the +# last assignments +KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS) +KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS) +KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS) # Use --build-id when available. LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ diff --git a/arch/arc/Makefile b/arch/arc/Makefile index db72fec0e160f..2f21e1e0ecf75 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -43,7 +43,8 @@ endif ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 -cflags-y += -O3 +# Note: No need to add to cflags-y as that happens anyways +ARCH_CFLAGS += -O3 endif # small data is default for elf32 tool-chain. If not usable, disable it diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430d..20b7dc17979ea 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned int temp; \ \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ @@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ : "r"(&v->counter), "ir"(i) \ : "cc"); \ \ + smp_mb(); \ + \ return temp; \ } @@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ unsigned long flags; \ unsigned long temp; \ \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ atomic_ops_lock(flags); \ temp = v->counter; \ temp c_op i; \ @@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) #define __atomic_add_unless(v, a, u) \ ({ \ int c, old; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ c = old; \ + \ + smp_mb(); \ + \ c; \ }) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939f..dae03e66fa9ea 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -18,83 +18,49 @@ #include #include #include +#ifndef CONFIG_ARC_HAS_LLSC +#include +#endif -/* - * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns. - * The Kconfig glue ensures that in SMP, this is only set if the container - * SoC/platform has cross-core coherent LLOCK/SCOND - */ #if defined(CONFIG_ARC_HAS_LLSC) -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - /* - * ARC ISA micro-optimization: - * - * Instructions dealing with bitpos only consider lower 5 bits (0-31) - * e.g (x << 33) is handled like (x << 1) by ASL instruction - * (mem pointer still needs adjustment to point to next word) - * - * Hence the masking to clamp @nr arg can be elided in general. - * - * However if @nr is a constant (above assumed it in a register), - * and greater than 31, gcc can optimize away (x << 33) to 0, - * as overflow, given the 32-bit ISA. Thus masking needs to be done - * for constant @nr, but no code is generated due to const prop. - */ - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bset %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bclr %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; +/* + * Hardware assisted Atomic-R-M-W + */ - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bxor %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned int temp; \ + \ + m += nr >> 5; \ + \ + /* \ + * ARC ISA micro-optimization: \ + * \ + * Instructions dealing with bitpos only consider lower 5 bits \ + * e.g (x << 33) is handled like (x << 1) by ASL instruction \ + * (mem pointer still needs adjustment to point to next word) \ + * \ + * Hence the masking to clamp @nr arg can be elided in general. \ + * \ + * However if @nr is a constant (above assumed in a register), \ + * and greater than 31, gcc can optimize away (x << 33) to 0, \ + * as overflow, given the 32-bit ISA. Thus masking needs to be \ + * done for const @nr, but no code is generated due to gcc \ + * const prop. \ + */ \ + nr &= 0x1f; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " scond %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ + : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \ + "ir"(nr) \ + : "cc"); \ } /* @@ -108,75 +74,37 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *m) * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally * and the old value of bit is returned */ -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bset %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bclr %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bxor %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, temp; \ + \ + m += nr >> 5; \ + \ + nr &= 0x1f; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND themselves don't provide any such smenatic \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%2] \n" \ + " " #asm_op " %1, %0, %3 \n" \ + " scond %1, [%2] \n" \ + " bnz 1b \n" \ + : "=&r"(old), "=&r"(temp) \ + : "r"(m), "ir"(nr) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return (old & (1 << nr)) != 0; \ } #else /* !CONFIG_ARC_HAS_LLSC */ -#include - /* * Non hardware assisted Atomic-R-M-W * Locking would change to irq-disabling only (UP) and spinlocks (SMP) @@ -193,108 +121,37 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * at compile time) */ -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp | (1UL << nr); - - bitops_unlock(flags); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long temp, flags; \ + m += nr >> 5; \ + \ + /* \ + * spin lock/unlock provide the needed smp_mb() before/after \ + */ \ + bitops_lock(flags); \ + \ + temp = *m; \ + *m = temp c_op (1UL << (nr & 0x1f)); \ + \ + bitops_unlock(flags); \ } -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp & ~(1UL << nr); - - bitops_unlock(flags); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp ^ (1UL << nr); - - bitops_unlock(flags); -} - -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old | (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old & ~(1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old ^ (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, flags; \ + m += nr >> 5; \ + \ + bitops_lock(flags); \ + \ + old = *m; \ + *m = old c_op (1UL << (nr & 0x1f)); \ + \ + bitops_unlock(flags); \ + \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -303,86 +160,45 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * Non atomic variants **************************************/ -static inline void __set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp | (1UL << nr); -} - -static inline void __clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp & ~(1UL << nr); -} - -static inline void __change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp ^ (1UL << nr); -} - -static inline int -__test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old | (1 << nr); - - return (old & (1 << nr)) != 0; +#define __BIT_OP(op, c_op, asm_op) \ +static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ +{ \ + unsigned long temp; \ + m += nr >> 5; \ + \ + temp = *m; \ + *m = temp c_op (1UL << (nr & 0x1f)); \ } -static inline int -__test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old & ~(1 << nr); - - return (old & (1 << nr)) != 0; +#define __TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old; \ + m += nr >> 5; \ + \ + old = *m; \ + *m = old c_op (1UL << (nr & 0x1f)); \ + \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } -static inline int -__test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old ^ (1 << nr); - - return (old & (1 << nr)) != 0; -} +#define BIT_OPS(op, c_op, asm_op) \ + \ + /* set_bit(), clear_bit(), change_bit() */ \ + BIT_OP(op, c_op, asm_op) \ + \ + /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\ + TEST_N_BIT_OP(op, c_op, asm_op) \ + \ + /* __set_bit(), __clear_bit(), __change_bit() */ \ + __BIT_OP(op, c_op, asm_op) \ + \ + /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\ + __TEST_N_BIT_OP(op, c_op, asm_op) + +BIT_OPS(set, |, bset) +BIT_OPS(clear, & ~, bclr) +BIT_OPS(change, ^, bxor) /* * This routine doesn't need to be atomic. @@ -394,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) addr += nr >> 5; - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - mask = 1 << nr; + mask = 1UL << (nr & 0x1f); return ((mask & *addr) != 0); } diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d6..44fd531f4d7b9 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -10,6 +10,8 @@ #define __ASM_ARC_CMPXCHG_H #include + +#include #include #ifdef CONFIG_ARC_HAS_LLSC @@ -19,16 +21,25 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) { unsigned long prev; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND thmeselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%1] \n" " brne %0, %2, 2f \n" " scond %3, [%1] \n" " bnz 1b \n" "2: \n" - : "=&r"(prev) - : "r"(ptr), "ir"(expected), - "r"(new) /* can't be "ir". scond can't take limm for "b" */ - : "cc"); + : "=&r"(prev) /* Early clobber, to prevent reg reuse */ + : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ + "ir"(expected), + "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ + : "cc", "memory"); /* so that gcc knows memory is being written here */ + + smp_mb(); return prev; } @@ -42,6 +53,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) int prev; volatile unsigned long *p = ptr; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ atomic_ops_lock(flags); prev = *p; if (prev == expected) @@ -77,12 +91,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, switch (size) { case 4: + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r"(val) : "r"(ptr) : "memory"); + smp_mb(); + return val; } return __xchg_bad_pointer(); diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 1bfeec2c0558c..2a58af7a2e3a4 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -63,7 +63,7 @@ struct callee_regs { long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) ((regs)->ret) +#define instruction_pointer(regs) (unsigned long)((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e4..e1651df6a93d5 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + /* + * This smp_mb() is technically superfluous, we only need the one + * after the lock for providing the ACQUIRE semantics. + * However doing the "right" thing was regressing hackbench + * so keeping this, pending further investigation + */ + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" : "+&r" (tmp) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); + + /* + * ACQUIRE barrier to ensure load/store after taking the lock + * don't "bleed-up" out of the critical section (leak-in is allowed) + * http://www.spinics.net/lists/kernel/msg2010409.html + * + * ARCv2 only has load-load, store-store and all-all barrier + * thus need the full all-all barrier + */ + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + smp_mb(); + return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); } @@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + /* + * RELEASE barrier: given the instructions avail on ARCv2, full barrier + * is the only option + */ + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + /* + * superfluous, but keeping for now - see pairing version in + * arch_spin_lock above + */ smp_mb(); } diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index fd2ec50102f20..57b58f52d825b 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -266,7 +266,6 @@ static int arc_pmu_add(struct perf_event *event, int flags) static int arc_pmu_device_probe(struct platform_device *pdev) { - struct arc_pmu *arc_pmu; struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; int i, j, ret; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b128..19f4cc634b0e8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -538,6 +538,7 @@ config ARCH_ORION5X select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY + select MULTI_IRQ_HANDLER help Support for the following Marvell Orion 5x series SoCs: Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182), diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 0c12ffb155a23..f775d7161ffb9 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -161,10 +161,9 @@ choice mobile SoCs in the Kona family of chips (e.g. bcm28155, bcm11351, etc...) - config DEBUG_BCM63XX + config DEBUG_BCM63XX_UART bool "Kernel low-level debugging on BCM63XX UART" depends on ARCH_BCM_63XX - select DEBUG_UART_BCM63XX config DEBUG_BERLIN_UART bool "Marvell Berlin SoC Debug UART" @@ -1304,7 +1303,7 @@ config DEBUG_LL_INCLUDE default "debug/vf.S" if DEBUG_VF_UART default "debug/vt8500.S" if DEBUG_VT8500_UART0 default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1 - default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX + default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0 default "mach/debug-macro.S" @@ -1320,10 +1319,6 @@ config DEBUG_UART_8250 ARCH_IOP33X || ARCH_IXP4XX || \ ARCH_LPC32XX || ARCH_MV78XX0 || ARCH_ORION5X || ARCH_RPC -# Compatibility options for BCM63xx -config DEBUG_UART_BCM63XX - def_bool ARCH_BCM_63XX - config DEBUG_UART_PHYS hex "Physical base address of debug UART" default 0x00100a00 if DEBUG_NETX_UART @@ -1415,7 +1410,7 @@ config DEBUG_UART_PHYS default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1 default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2 default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3 - default 0xfffe8600 if DEBUG_UART_BCM63XX + default 0xfffe8600 if DEBUG_BCM63XX_UART default 0xfffff700 if ARCH_IOP33X depends on ARCH_EP93XX || \ DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ @@ -1427,7 +1422,7 @@ config DEBUG_UART_PHYS DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF2 || \ DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \ DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \ - DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \ + DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \ DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 config DEBUG_UART_VIRT @@ -1466,7 +1461,7 @@ config DEBUG_UART_VIRT default 0xfb009000 if DEBUG_REALVIEW_STD_PORT default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT default 0xfc40ab00 if DEBUG_BRCMSTB_UART - default 0xfcfe8600 if DEBUG_UART_BCM63XX + default 0xfcfe8600 if DEBUG_BCM63XX_UART default 0xfd000000 if ARCH_SPEAR3XX || ARCH_SPEAR6XX default 0xfd000000 if ARCH_SPEAR13XX default 0xfd012000 if ARCH_MV78XX0 @@ -1516,7 +1511,7 @@ config DEBUG_UART_VIRT DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \ DEBUG_NETX_UART || \ DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \ - DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \ + DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \ DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 config DEBUG_UART_8250_SHIFT diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 985227cbbd1bd..47f10e7ad1f6e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -50,6 +50,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index bd245d34952d2..a0765e7ed6c7d 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -57,5 +57,5 @@ extern char * strstr(const char * s1, const char *s2); int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { - return decompress(input, len, NULL, NULL, output, NULL, error); + return __decompress(input, len, NULL, NULL, output, 0, NULL, error); } diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 7128fad991ac3..e8397879d0a7e 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -354,11 +354,12 @@ /* SMPS9 unused */ ldo1_reg: ldo1 { - /* VDD_SD */ + /* VDD_SD / VDDSHV8 */ regulator-name = "ldo1"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; + regulator-always-on; }; ldo2_reg: ldo2 { @@ -544,6 +545,10 @@ phy-supply = <&ldousb_reg>; }; +&usb2_phy2 { + phy-supply = <&ldousb_reg>; +}; + &usb1 { dr_mode = "host"; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index ec96f0b363465..06a2f2ae9d1e4 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -270,7 +270,6 @@ }; eth0: ethernet@70000 { - compatible = "marvell,armada-370-neta"; reg = <0x70000 0x4000>; interrupts = <8>; clocks = <&gateclk 4>; @@ -286,7 +285,6 @@ }; eth1: ethernet@74000 { - compatible = "marvell,armada-370-neta"; reg = <0x74000 0x4000>; interrupts = <10>; clocks = <&gateclk 3>; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 00b50db57c9c0..ca4257b2f77dc 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -307,6 +307,14 @@ dmacap,memset; }; }; + + ethernet@70000 { + compatible = "marvell,armada-370-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-370-neta"; + }; }; }; }; diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts index 7219ac3a3d900..9f730e8e9f872 100644 --- a/arch/arm/boot/dts/armada-385-db-ap.dts +++ b/arch/arm/boot/dts/armada-385-db-ap.dts @@ -46,7 +46,7 @@ / { model = "Marvell Armada 385 Access Point Development Board"; - compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada38x"; + compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada380"; chosen { stdout-path = "serial1:115200n8"; diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index 78514ab0b47ac..757ac079e7f23 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -288,16 +288,6 @@ gpio = <&expander0 4 GPIO_ACTIVE_HIGH>; }; - reg_usb2_1_vbus: v5-vbus1 { - compatible = "regulator-fixed"; - regulator-name = "v5.0-vbus1"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - enable-active-high; - regulator-always-on; - gpio = <&expander0 4 GPIO_ACTIVE_HIGH>; - }; - reg_sata0: pwr-sata0 { compatible = "regulator-fixed"; regulator-name = "pwr_en_sata0"; diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi index 8479fdc9e9c24..c5fdc99f0dbeb 100644 --- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi @@ -318,7 +318,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi index 661d54c815802..0e24f1a38540e 100644 --- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi @@ -356,7 +356,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 013d63f69e361..8fdd6d7c0ab12 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -177,7 +177,7 @@ }; eth2: ethernet@30000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x30000 0x4000>; interrupts = <12>; clocks = <&gateclk 2>; @@ -220,6 +220,14 @@ }; }; + ethernet@70000 { + compatible = "marvell,armada-xp-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-xp-neta"; + }; + xor@f0900 { compatible = "marvell,orion-xor"; reg = <0xF0900 0x100 diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index c740e1a2a3a5c..4f29968076ceb 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -98,7 +98,7 @@ phy0: ethernet-phy@1 { interrupt-parent = <&pioE>; - interrupts = <1 IRQ_TYPE_EDGE_FALLING>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; reg = <1>; }; }; diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts index 89ef4a540db58..d4d24a0814048 100644 --- a/arch/arm/boot/dts/at91-sama5d4ek.dts +++ b/arch/arm/boot/dts/at91-sama5d4ek.dts @@ -108,8 +108,8 @@ mmc0: mmc@f8000000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0_clk_cmd_dat0 &pinctrl_mmc0_dat1_3 &pinctrl_mmc0_cd>; - slot@1 { - reg = <1>; + slot@0 { + reg = <0>; bus-width = <4>; cd-gpios = <&pioE 5 0>; }; @@ -141,8 +141,15 @@ }; macb0: ethernet@f8020000 { + pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>; phy-mode = "rmii"; status = "okay"; + + ethernet-phy@1 { + reg = <0x1>; + interrupt-parent = <&pioE>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; + }; }; mmc1: mmc@fc000000 { @@ -174,6 +181,10 @@ pinctrl@fc06a000 { board { + pinctrl_macb0_phy_irq: macb0_phy_irq { + atmel,pins = + ; + }; pinctrl_mmc0_cd: mmc0_cd { atmel,pins = ; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 70e59c5ceb2f7..e54421176af8a 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -1148,7 +1148,7 @@ usb2: gadget@fff78000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00600000 0x80000 0xfff78000 0x400>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 3aa56ae3410a5..3314a73037546 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1062,7 +1062,7 @@ usb2: gadget@f803c000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index aa465904f6cc4..096f68be99e2b 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -686,7 +686,8 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index f03a091cd0766..dfcc0dd637e5b 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -116,7 +116,7 @@ ranges = <0 0x2000 0x2000>; scm_conf: scm_conf@0 { - compatible = "syscon"; + compatible = "syscon", "simple-bus"; reg = <0x0 0x1400>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index ce0390f081d92..6b05f6a0ba84a 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -497,9 +497,10 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; &qspi { diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts index 0b9906880c0c7..75aba40c69e13 100644 --- a/arch/arm/boot/dts/exynos3250-rinato.dts +++ b/arch/arm/boot/dts/exynos3250-rinato.dts @@ -181,7 +181,7 @@ display-timings { timing-0 { - clock-frequency = <0>; + clock-frequency = <4600000>; hactive = <320>; vactive = <320>; hfront-porch = <1>; diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 146e71118a72b..a0ec8bff83ddf 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -915,6 +915,11 @@ }; }; +&pmu_system_controller { + assigned-clocks = <&pmu_system_controller 0>; + assigned-clock-parents = <&clock CLK_FIN_PLL>; +}; + &rtc { status = "okay"; clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 02eb8b15374f3..1171f347878a7 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -878,6 +878,11 @@ }; }; +&pmu_system_controller { + assigned-clocks = <&pmu_system_controller 0>; + assigned-clock-parents = <&clock CLK_FIN_PLL>; +}; + &rtc { status = "okay"; clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>; diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index bbcfb5a19c770..0cb8b0b11c3ff 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -435,6 +435,7 @@ interrupts = <36 37 38 39 40 41 42 43 44>; status = "disabled"; clocks = <&clks 26>; + #io-channel-cells = <1>; }; spdif@80054000 { diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts index dd45e6971bc35..9351296356dcc 100644 --- a/arch/arm/boot/dts/imx25-pdk.dts +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -10,6 +10,7 @@ */ /dts-v1/; +#include #include #include "imx25.dtsi" @@ -114,8 +115,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 1 0>; - wp-gpios = <&gpio2 0 0>; + cd-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index bc215e4b75fd5..6a87233d0b194 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -477,7 +477,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024000 0x200>; interrupts = <56>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 0>; status = "disabled"; }; @@ -486,7 +489,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024200 0x200>; interrupts = <54>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 1>; dr_mode = "host"; status = "disabled"; @@ -496,7 +502,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024400 0x200>; interrupts = <55>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 2>; dr_mode = "host"; status = "disabled"; @@ -506,7 +515,6 @@ #index-cells = <1>; compatible = "fsl,imx27-usbmisc"; reg = <0x10024600 0x200>; - clocks = <&clks IMX27_CLK_USB_AHB_GATE>; }; sahara2: sahara@10025000 { diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index b6478e97d6a7e..e6540b5cfa4ca 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -286,8 +286,8 @@ can1: can@53fe4000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe4000 0x1000>; - clocks = <&clks 33>; - clock-names = "ipg"; + clocks = <&clks 33>, <&clks 33>; + clock-names = "ipg", "per"; interrupts = <43>; status = "disabled"; }; @@ -295,8 +295,8 @@ can2: can@53fe8000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe8000 0x1000>; - clocks = <&clks 34>; - clock-names = "ipg"; + clocks = <&clks 34>, <&clks 34>; + clock-names = "ipg", "per"; interrupts = <44>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx51-apf51dev.dts b/arch/arm/boot/dts/imx51-apf51dev.dts index 93d3ea12328c5..0f3fe29b816eb 100644 --- a/arch/arm/boot/dts/imx51-apf51dev.dts +++ b/arch/arm/boot/dts/imx51-apf51dev.dts @@ -98,7 +98,7 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 29 GPIO_ACTIVE_LOW>; bus-width = <4>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts index e9337ad52f59b..3bc18835fb4bb 100644 --- a/arch/arm/boot/dts/imx53-ard.dts +++ b/arch/arm/boot/dts/imx53-ard.dts @@ -103,8 +103,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts index d0e0f57eb432e..53f40885c5306 100644 --- a/arch/arm/boot/dts/imx53-m53evk.dts +++ b/arch/arm/boot/dts/imx53-m53evk.dts @@ -124,8 +124,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 181ae5ebf23f6..1f55187ed9ce3 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -147,8 +147,8 @@ &esdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc3>; - cd-gpios = <&gpio3 11 0>; - wp-gpios = <&gpio3 12 0>; + cd-gpios = <&gpio3 11 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio3 12 GPIO_ACTIVE_HIGH>; bus-width = <8>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-smd.dts b/arch/arm/boot/dts/imx53-smd.dts index 1d325576bcc04..fc89ce1e5763a 100644 --- a/arch/arm/boot/dts/imx53-smd.dts +++ b/arch/arm/boot/dts/imx53-smd.dts @@ -41,8 +41,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio3 13 0>; - wp-gpios = <&gpio4 11 0>; + cd-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 11 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi index 4f1f0e2868bf1..e03373a58760f 100644 --- a/arch/arm/boot/dts/imx53-tqma53.dtsi +++ b/arch/arm/boot/dts/imx53-tqma53.dtsi @@ -41,8 +41,8 @@ pinctrl-0 = <&pinctrl_esdhc2>, <&pinctrl_esdhc2_cdwp>; vmmc-supply = <®_3p3v>; - wp-gpios = <&gpio1 2 0>; - cd-gpios = <&gpio1 4 0>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx53-tx53.dtsi b/arch/arm/boot/dts/imx53-tx53.dtsi index 704bd72cbfec8..d3e50b22064f2 100644 --- a/arch/arm/boot/dts/imx53-tx53.dtsi +++ b/arch/arm/boot/dts/imx53-tx53.dtsi @@ -183,7 +183,7 @@ }; &esdhc1 { - cd-gpios = <&gpio3 24 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; @@ -191,7 +191,7 @@ }; &esdhc2 { - cd-gpios = <&gpio3 25 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; diff --git a/arch/arm/boot/dts/imx53-voipac-bsb.dts b/arch/arm/boot/dts/imx53-voipac-bsb.dts index c17d3ad6dba50..fc51b87ad2087 100644 --- a/arch/arm/boot/dts/imx53-voipac-bsb.dts +++ b/arch/arm/boot/dts/imx53-voipac-bsb.dts @@ -119,8 +119,8 @@ &esdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; - cd-gpios = <&gpio3 25 0>; - wp-gpios = <&gpio2 19 0>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6dl-riotboard.dts b/arch/arm/boot/dts/imx6dl-riotboard.dts index 43cb3fd76be76..5111f5170d534 100644 --- a/arch/arm/boot/dts/imx6dl-riotboard.dts +++ b/arch/arm/boot/dts/imx6dl-riotboard.dts @@ -305,8 +305,8 @@ &usdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; - cd-gpios = <&gpio1 4 0>; - wp-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -314,8 +314,8 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 0>; - wp-gpios = <&gpio7 1 0>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-arm2.dts b/arch/arm/boot/dts/imx6q-arm2.dts index 78df05e9d1ce6..d6515f7a56c42 100644 --- a/arch/arm/boot/dts/imx6q-arm2.dts +++ b/arch/arm/boot/dts/imx6q-arm2.dts @@ -11,6 +11,7 @@ */ /dts-v1/; +#include #include "imx6q.dtsi" / { @@ -196,8 +197,8 @@ }; &usdhc3 { - cd-gpios = <&gpio6 11 0>; - wp-gpios = <&gpio6 14 0>; + cd-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3 diff --git a/arch/arm/boot/dts/imx6q-gk802.dts b/arch/arm/boot/dts/imx6q-gk802.dts index 703539cf36d30..00bd63e63d0cd 100644 --- a/arch/arm/boot/dts/imx6q-gk802.dts +++ b/arch/arm/boot/dts/imx6q-gk802.dts @@ -7,6 +7,7 @@ */ /dts-v1/; +#include #include "imx6q.dtsi" / { @@ -161,7 +162,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; - cd-gpios = <&gpio6 11 0>; + cd-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts index a43abfa21e33b..5645d52850a7e 100644 --- a/arch/arm/boot/dts/imx6q-tbs2910.dts +++ b/arch/arm/boot/dts/imx6q-tbs2910.dts @@ -251,7 +251,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; - cd-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -260,7 +260,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; - cd-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi index e6d9195a1da7b..f4d6ae564ead2 100644 --- a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi +++ b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi @@ -173,7 +173,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio4 7 GPIO_ACTIVE_LOW>; status = "okay"; }; @@ -181,7 +181,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio4 8 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi index d033bb1820602..6a846e0ef5054 100644 --- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi @@ -259,6 +259,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_cubox_i_usdhc2_aux &pinctrl_cubox_i_usdhc2>; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio1 4 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi index 2c253d6d20bd1..45e7c39e80d58 100644 --- a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi +++ b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi @@ -1,3 +1,5 @@ +#include + / { regulators { compatible = "simple-bus"; @@ -181,7 +183,7 @@ &usdhc2 { /* module slot */ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; - cd-gpios = <&gpio2 2 0>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index b5756c21ea1d5..4493f6e993301 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -318,7 +318,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi index 86f03c1b147c6..a857d1294609a 100644 --- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi @@ -324,7 +324,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi index 4a8d97f477592..1afe3385e2d28 100644 --- a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi @@ -417,7 +417,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi index 151a3db2aea95..c6833d2b4ff5c 100644 --- a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi @@ -41,6 +41,7 @@ */ #include "imx6qdl-microsom.dtsi" #include "imx6qdl-microsom-ar8035.dtsi" +#include / { chosen { @@ -288,6 +289,6 @@ &pinctrl_hummingboard_usdhc2 >; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio1 4 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi index 08218120e770a..64e0b6178bf43 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi @@ -449,7 +449,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 0>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -457,7 +457,7 @@ &usdhc4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; - cd-gpios = <&gpio2 6 0>; + cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index 1ce6133b67f5c..9e6ecd99b472d 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -409,8 +409,8 @@ &usdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; - cd-gpios = <&gpio1 4 0>; - wp-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; status = "disabled"; }; @@ -418,7 +418,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3 &pinctrl_usdhc3_cdwp>; - cd-gpios = <&gpio1 27 0>; - wp-gpios = <&gpio1 29 0>; + cd-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi index 488a640796ac0..a503562438888 100644 --- a/arch/arm/boot/dts/imx6qdl-rex.dtsi +++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi @@ -35,7 +35,6 @@ compatible = "regulator-fixed"; reg = <1>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbh1>; regulator-name = "usbh1_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; @@ -47,7 +46,6 @@ compatible = "regulator-fixed"; reg = <2>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbotg>; regulator-name = "usb_otg_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; @@ -342,7 +340,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -351,6 +349,6 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 3b24b12651b2b..e329ca5c33227 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -467,8 +467,8 @@ pinctrl-0 = <&pinctrl_usdhc3>; pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; - cd-gpios = <&gpio6 15 0>; - wp-gpios = <&gpio1 13 0>; + cd-gpios = <&gpio6 15 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi index 0b28a9d5241e5..1e27485e42931 100644 --- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi @@ -444,8 +444,8 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 0>; - wp-gpios = <&gpio7 1 0>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -453,7 +453,7 @@ &usdhc4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; - cd-gpios = <&gpio2 6 0>; + cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index a626e6dd8022c..944eb81cb2b8c 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -562,8 +562,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <8>; - cd-gpios = <&gpio2 2 0>; - wp-gpios = <&gpio2 3 0>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -571,8 +571,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <8>; - cd-gpios = <&gpio2 0 0>; - wp-gpios = <&gpio2 1 0>; + cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-tx6.dtsi b/arch/arm/boot/dts/imx6qdl-tx6.dtsi index f02b80b41d4fb..da08de324e9eb 100644 --- a/arch/arm/boot/dts/imx6qdl-tx6.dtsi +++ b/arch/arm/boot/dts/imx6qdl-tx6.dtsi @@ -680,7 +680,7 @@ pinctrl-0 = <&pinctrl_usdhc1>; bus-width = <4>; no-1-8-v; - cd-gpios = <&gpio7 2 0>; + cd-gpios = <&gpio7 2 GPIO_ACTIVE_LOW>; fsl,wp-controller; status = "okay"; }; @@ -690,7 +690,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; no-1-8-v; - cd-gpios = <&gpio7 3 0>; + cd-gpios = <&gpio7 3 GPIO_ACTIVE_LOW>; fsl,wp-controller; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index 5fb091675582e..9e096d811beda 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -9,6 +9,8 @@ * */ +#include + / { regulators { compatible = "simple-bus"; @@ -250,13 +252,13 @@ &usdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; - cd-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 2 GPIO_ACTIVE_LOW>; status = "okay"; }; &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio3 9 0>; + cd-gpios = <&gpio3 9 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index f74a8ded515f2..38c786018a09d 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -153,10 +153,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_PCIE_AXI>, <&clks IMX6QDL_CLK_LVDS1_GATE>, <&clks IMX6QDL_CLK_PCIE_REF_125M>; diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts index 945887d3fdb35..b84dff2e94ea1 100644 --- a/arch/arm/boot/dts/imx6sl-evk.dts +++ b/arch/arm/boot/dts/imx6sl-evk.dts @@ -617,8 +617,8 @@ pinctrl-1 = <&pinctrl_usdhc1_100mhz>; pinctrl-2 = <&pinctrl_usdhc1_200mhz>; bus-width = <8>; - cd-gpios = <&gpio4 7 0>; - wp-gpios = <&gpio4 6 0>; + cd-gpios = <&gpio4 7 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 6 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -627,8 +627,8 @@ pinctrl-0 = <&pinctrl_usdhc2>; pinctrl-1 = <&pinctrl_usdhc2_100mhz>; pinctrl-2 = <&pinctrl_usdhc2_200mhz>; - cd-gpios = <&gpio5 0 0>; - wp-gpios = <&gpio4 29 0>; + cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 29 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -637,6 +637,6 @@ pinctrl-0 = <&pinctrl_usdhc3>; pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; - cd-gpios = <&gpio3 22 0>; + cd-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index e3c0b63c22056..115f3fd789718 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -49,7 +49,7 @@ pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; bus-width = <8>; - cd-gpios = <&gpio7 10 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; keep-power-in-suspend; enable-sdio-wakeup; @@ -61,7 +61,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; bus-width = <8>; - cd-gpios = <&gpio7 11 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>; no-1-8-v; keep-power-in-suspend; enable-sdio-wakup; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index cef04cef3a807..ac88c3467078e 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -293,7 +293,7 @@ pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; bus-width = <8>; - cd-gpios = <&gpio2 10 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 10 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>; keep-power-in-suspend; enable-sdio-wakeup; @@ -304,7 +304,7 @@ &usdhc4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; - cd-gpios = <&gpio6 21 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio6 21 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio6 20 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 4773d6af66a0a..d56d68fe7ffc6 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -13,9 +13,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi index d5adee3c00675..af9b7190533aa 100644 --- a/arch/arm/boot/dts/k2hk-clocks.dtsi +++ b/arch/arm/boot/dts/k2hk-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi index eb1e3e29f0738..ef8464bb11ffd 100644 --- a/arch/arm/boot/dts/k2l-clocks.dtsi +++ b/arch/arm/boot/dts/k2l-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi index 11a7963be0035..2390f387c2716 100644 --- a/arch/arm/boot/dts/omap2430.dtsi +++ b/arch/arm/boot/dts/omap2430.dtsi @@ -51,7 +51,8 @@ }; scm_conf: scm_conf@270 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x270 0x240>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index a5474113cd506..67659a0ed13e1 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -202,7 +202,7 @@ tfp410_pins: pinmux_tfp410_pins { pinctrl-single,pins = < - 0x194 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ + 0x196 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ >; }; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index f884d6adb71e8..84be9da74c7e5 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -191,7 +191,8 @@ }; omap4_padconf_global: omap4_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0x170>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 74777a6e200a6..2e7c1364cb005 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -31,6 +31,24 @@ regulator-max-microvolt = <3000000>; }; + mmc3_pwrseq: sdhci0_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clk32kgaudio>; + clock-names = "ext_clock"; + }; + + vmmcsdio_fixed: fixedregulator-mmcsdio { + compatible = "regulator-fixed"; + regulator-name = "vmmcsdio_fixed"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio5 12 GPIO_ACTIVE_HIGH>; /* gpio140 WLAN_EN */ + enable-active-high; + startup-delay-us = <70000>; + pinctrl-names = "default"; + pinctrl-0 = <&wlan_pins>; + }; + /* HS USB Host PHY on PORT 2 */ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; @@ -174,8 +192,8 @@ i2c5_pins: pinmux_i2c5_pins { pinctrl-single,pins = < - 0x184 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ - 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ + 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ + 0x188 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ >; }; @@ -197,12 +215,20 @@ >; }; - mcspi4_pins: pinmux_mcspi4_pins { + mmc3_pins: pinmux_mmc3_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x01a4, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_clk */ + OMAP5_IOPAD(0x01a6, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_cmd */ + OMAP5_IOPAD(0x01a8, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data0 */ + OMAP5_IOPAD(0x01aa, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data1 */ + OMAP5_IOPAD(0x01ac, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data2 */ + OMAP5_IOPAD(0x01ae, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data3 */ + >; + }; + + wlan_pins: pinmux_wlan_pins { pinctrl-single,pins = < - 0x164 (PIN_INPUT | MUX_MODE1) /* mcspi4_clk */ - 0x168 (PIN_INPUT | MUX_MODE1) /* mcspi4_simo */ - 0x16a (PIN_INPUT | MUX_MODE1) /* mcspi4_somi */ - 0x16c (PIN_INPUT | MUX_MODE1) /* mcspi4_cs0 */ + OMAP5_IOPAD(0x1bc, PIN_OUTPUT | MUX_MODE6) /* mcspi1_clk.gpio5_140 */ >; }; @@ -276,6 +302,12 @@ 0x1A (PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */ >; }; + + wlcore_irq_pin: pinmux_wlcore_irq_pin { + pinctrl-single,pins = < + OMAP5_IOPAD(0x040, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ + >; + }; }; &mmc1 { @@ -290,8 +322,25 @@ }; &mmc3 { + vmmc-supply = <&vmmcsdio_fixed>; + mmc-pwrseq = <&mmc3_pwrseq>; bus-width = <4>; - ti,non-removable; + non-removable; + cap-power-off-card; + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins &wlcore_irq_pin>; + interrupts-extended = <&gic GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH + &omap5_pmx_core 0x168>; + + #address-cells = <1>; + #size-cells = <0>; + wlcore: wlcore@2 { + compatible = "ti,wl1271"; + reg = <2>; + interrupt-parent = <&gpio1>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */ + ref-clock-frequency = <26000000>; + }; }; &mmc4 { @@ -591,11 +640,6 @@ pinctrl-0 = <&mcspi3_pins>; }; -&mcspi4 { - pinctrl-names = "default"; - pinctrl-0 = <&mcspi4_pins>; -}; - &uart1 { pinctrl-names = "default"; pinctrl-0 = <&uart1_pins>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 7d24ae0306b56..874a26f9dc0ff 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -180,7 +180,8 @@ }; omap5_padconf_global: omap5_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0xec>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 165968d51d8fd..8eca5878a8774 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -584,7 +584,7 @@ compatible = "rockchip,rk3288-wdt", "snps,dw-wdt"; reg = <0xff800000 0x100>; clocks = <&cru PCLK_WDT>; - interrupts = ; + interrupts = ; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 57ab8587f7b97..37e6182f14700 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1321,7 +1321,7 @@ usb0: gadget@00500000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00500000 0x100000 0xf8030000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 6b1bb58f9c0b6..cf11660f35a17 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -123,7 +123,7 @@ usb0: gadget@00400000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00400000 0x100000 0xfc02c000 0x4000>; interrupts = <47 IRQ_TYPE_LEVEL_HIGH 2>; @@ -918,11 +918,11 @@ reg = <0xf8018000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 6>; dmas = <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(4)>, + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(4))>, <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(5)>; + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(5))>; dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; @@ -1125,10 +1125,10 @@ compatible = "atmel,at91sam9g46-aes"; reg = <0xfc044000 0x100>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH 0>; - dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(41)>, - <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(40)>; + dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(41))>, + <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(40))>; dma-names = "tx", "rx"; clocks = <&aes_clk>; clock-names = "aes_clk"; @@ -1139,10 +1139,10 @@ compatible = "atmel,at91sam9g46-tdes"; reg = <0xfc04c000 0x100>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH 0>; - dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(42)>, - <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(43)>; + dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(42))>, + <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(43))>; dma-names = "tx", "rx"; clocks = <&tdes_clk>; clock-names = "tdes_clk"; @@ -1153,8 +1153,8 @@ compatible = "atmel,at91sam9g46-sha"; reg = <0xfc050000 0x100>; interrupts = <15 IRQ_TYPE_LEVEL_HIGH 0>; - dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(44)>; + dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(44))>; dma-names = "tx"; clocks = <&sha_clk>; clock-names = "sha_clk"; @@ -1219,7 +1219,7 @@ dbgu: serial@fc069000 { compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; reg = <0xfc069000 0x200>; - interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>; + interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_dbgu>; clocks = <&dbgu_clk>; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index f182f6538e902..89ed9b45d5337 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -122,22 +122,14 @@ }; mmcsd_default_mode: mmcsd_default { mmcsd_default_cfg1 { - /* MCCLK */ - pins = "GPIO8_B10"; - ste,output = <0>; - }; - mmcsd_default_cfg2 { - /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */ - pins = "GPIO10_C11", "GPIO15_A12", - "GPIO16_C13", "GPIO23_D15"; - ste,output = <1>; - }; - mmcsd_default_cfg3 { - /* MCCMD, MCDAT3-0, MCMSFBCLK */ - pins = "GPIO9_A10", "GPIO11_B11", - "GPIO12_A11", "GPIO13_C12", - "GPIO14_B12", "GPIO24_C15"; - ste,input = <1>; + /* + * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 + * MCCMD, MCDAT3-0, MCMSFBCLK + */ + pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11", + "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12", + "GPIO16_C13", "GPIO23_D15", "GPIO24_C15"; + ste,output = <2>; }; }; }; @@ -802,10 +794,21 @@ clock-names = "mclk", "apb_pclk"; interrupt-parent = <&vica>; interrupts = <22>; - max-frequency = <48000000>; + max-frequency = <400000>; bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + full-pwr-cycle; + /* + * The STw4811 circuit used with the Nomadik strictly + * requires that all of these signal direction pins be + * routed and used for its 4-bit levelshifter. + */ + st,sig-dir-dat0; + st,sig-dir-dat2; + st,sig-dir-dat31; + st,sig-dir-cmd; + st,sig-pin-fbclk; pinctrl-names = "default"; pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>; vmmc-supply = <&vmmc_regulator>; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 2fd8988f310c6..3794ca16499d6 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -573,7 +573,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; #thermal-sensor-cells = <0>; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 883cb4873688f..5098185abde67 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -555,7 +555,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; #thermal-sensor-cells = <0>; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index fdd181792b4be..fa36571b755ab 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -111,7 +111,7 @@ 720000 1200000 528000 1100000 312000 1000000 - 144000 900000 + 144000 1000000 >; #cooling-cells = <2>; cooling-min-level = <0>; @@ -1042,7 +1042,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = ; #thermal-sensor-cells = <0>; diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 5662a872689b3..30613204da154 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -406,7 +406,8 @@ static irqreturn_t dma_irq_handler(int irq, void *data) BIT(slot)); if (edma_cc[ctlr]->intr_data[channel].callback) edma_cc[ctlr]->intr_data[channel].callback( - channel, EDMA_DMA_COMPLETE, + EDMA_CTLR_CHAN(ctlr, channel), + EDMA_DMA_COMPLETE, edma_cc[ctlr]->intr_data[channel].data); } } while (sh_ipr); @@ -460,7 +461,8 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) if (edma_cc[ctlr]->intr_data[k]. callback) { edma_cc[ctlr]->intr_data[k]. - callback(k, + callback( + EDMA_CTLR_CHAN(ctlr, k), EDMA_DMA_CC_ERROR, edma_cc[ctlr]->intr_data [k].data); diff --git a/arch/arm/common/icst.c b/arch/arm/common/icst.c index 2dc6da70ae598..d7ed252708c57 100644 --- a/arch/arm/common/icst.c +++ b/arch/arm/common/icst.c @@ -16,7 +16,7 @@ */ #include #include - +#include #include /* @@ -29,7 +29,11 @@ EXPORT_SYMBOL(icst525_s2div); unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco) { - return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]); + u64 dividend = p->ref * 2 * (u64)(vco.v + 8); + u32 divisor = (vco.r + 2) * p->s2div[vco.s]; + + do_div(dividend, divisor); + return (unsigned long)dividend; } EXPORT_SYMBOL(icst_hz); @@ -58,6 +62,7 @@ icst_hz_to_vco(const struct icst_params *p, unsigned long freq) if (f > p->vco_min && f <= p->vco_max) break; + i++; } while (i < 8); if (i >= 8) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index a6ad93c9bce35..fd9eefce0a7b8 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -259,15 +259,17 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return err; - patch_text((void *)bpt->bpt_addr, - *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); return 0; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 423663e23791e..586eef26203d1 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -343,12 +343,17 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 6 /* - * Clear the If-Then Thumb-2 execution state - * ARM spec requires this to be all 000s in ARM mode - * Snapdragon S4/Krait misbehaves on a Thumb=>ARM - * signal transition without this. + * Clear the If-Then Thumb-2 execution state. ARM spec + * requires this to be all 000s in ARM mode. Snapdragon + * S4/Krait misbehaves on a Thumb=>ARM signal transition + * without this. + * + * We must do this whenever we are running on a Thumb-2 + * capable CPU, which includes ARMv6T2. However, we elect + * to do this whenever we're on an ARMv6 or later CPU for + * simplicity. */ cpsr &= ~PSR_IT_MASK; #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b87581855..f11d825270761 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -576,7 +576,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -635,7 +635,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f1f79d1043096..60c1a0f4d67ae 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES + select ARM_GIC select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56e..d6223cbcb6618 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -450,7 +450,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { + if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { ret = kvm_vgic_map_resources(kvm); if (ret) return ret; diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index d503fbb787d36..88993cc95e8e6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -155,7 +155,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } static unsigned long num_core_regs(void) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 79caf79b304a0..f7db3a5d80e3b 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -170,13 +170,9 @@ __kvm_vcpu_return: @ Don't trap coprocessor accesses for host kernel set_hstr vmexit set_hdcr vmexit - set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore #ifdef CONFIG_VFPv3 - @ Save floating point registers we if let guest use them. - tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) - bne after_vfp_restore - @ Switch VFP/NEON hardware state to the host's add r7, vcpu, #VCPU_VFP_GUEST store_vfp_state r7 @@ -188,6 +184,8 @@ after_vfp_restore: @ Restore FPEXC_EN which we clobbered on entry pop {r2} VFPFMXR FPEXC, r2 +#else +after_vfp_restore: #endif @ Reset Hyp-role @@ -483,7 +481,7 @@ switch_to_guest_vfp: push {r3-r7} @ NEON/VFP used. Turn on VFP access. - set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) @ Switch VFP/NEON hardware state to the guest's add r7, r0, #VCPU_VFP_HOST diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 35e4a3a0c476c..58048b333d31a 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -518,8 +518,7 @@ ARM_BE8(rev r6, r6 ) mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - bic r2, #1 @ Clear ENABLE - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL @@ -532,6 +531,9 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: + mov r2, #0 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) @@ -591,8 +593,13 @@ ARM_BE8(rev r6, r6 ) .endm /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return - * (hardware reset value is 0). Keep previous value in r2. */ -.macro set_hcptr operation, mask + * (hardware reset value is 0). Keep previous value in r2. + * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if + * VFP wasn't already enabled (always executed on vmtrap). + * If a label is specified with vmexit, it is branched to if VFP wasn't + * enabled. + */ +.macro set_hcptr operation, mask, label = none mrc p15, 4, r2, c1, c1, 2 ldr r3, =\mask .if \operation == vmentry @@ -601,6 +608,17 @@ ARM_BE8(rev r6, r6 ) bic r3, r2, r3 @ Don't trap defined coproc-accesses .endif mcr p15, 4, r3, c1, c1, 2 + .if \operation != vmentry + .if \operation == vmexit + tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + beq 1f + .endif + isb + .if \label != none + b \label + .endif +1: + .endif .endm /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf2..da09ddcfcc00e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -98,6 +98,11 @@ static void kvm_flush_dcache_pud(pud_t pud) __kvm_flush_dcache_pud(pud); } +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + /** * stage2_dissolve_pmd() - clear and flush huge PMD entry * @kvm: pointer to kvm structure. @@ -213,7 +218,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, kvm_tlb_flush_vmid_ipa(kvm, addr); /* No need to invalidate the cache for device mappings */ - if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + if (!kvm_is_device_pfn(pte_pfn(old_pte))) kvm_flush_dcache_pte(old_pte); put_page(virt_to_page(pte)); @@ -305,8 +310,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, pte = pte_offset_kernel(pmd, addr); do { - if (!pte_none(*pte) && - (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte))) kvm_flush_dcache_pte(*pte); } while (pte++, addr += PAGE_SIZE, addr != end); } @@ -1037,11 +1041,6 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) return kvm_vcpu_dabt_iswrite(vcpu); } -static bool kvm_is_device_pfn(unsigned long pfn) -{ - return !pfn_valid(pfn); -} - /** * stage2_wp_ptes - write protect PMD range * @pmd: pointer to pmd entry @@ -1790,8 +1789,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (vma->vm_flags & VM_PFNMAP) { gpa_t gpa = mem->guest_phys_addr + (vm_start - mem->userspace_addr); - phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + - vm_start - vma->vm_start; + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 02fa8eff6ae1d..531e922486b2f 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -230,10 +230,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; - case PSCI_0_2_FN_MIGRATE: - case PSCI_0_2_FN64_MIGRATE: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_MIGRATE_INFO_TYPE: /* * Trusted OS is MP hence does not require migration @@ -242,10 +238,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) */ val = PSCI_0_2_TOS_MP; break; - case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: - case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -271,7 +263,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; default: - return -EINVAL; + val = PSCI_RET_NOT_SUPPORTED; + break; } *vcpu_reg(vcpu, 0) = val; @@ -291,12 +284,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; - case KVM_PSCI_FN_CPU_SUSPEND: - case KVM_PSCI_FN_MIGRATE: + default: val = PSCI_RET_NOT_SUPPORTED; break; - default: - return -EINVAL; } *vcpu_reg(vcpu, 0) = val; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index bd22b2c8a0519..d3161c7ee1fd0 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -81,6 +81,8 @@ tmp2 .req r5 * @r2: base address of second SDRAM Controller or 0 if not present * @r3: pm information */ +/* at91_pm_suspend_in_sram must be 8-byte aligned per the requirements of fncpy() */ + .align 3 ENTRY(at91_pm_suspend_in_sram) /* Save registers on stack */ stmfd sp!, {r4 - r12, lr} diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile index 4c38674c73ecb..54d274da7ccba 100644 --- a/arch/arm/mach-bcm/Makefile +++ b/arch/arm/mach-bcm/Makefile @@ -43,5 +43,5 @@ obj-$(CONFIG_ARCH_BCM_63XX) := bcm63xx.o ifeq ($(CONFIG_ARCH_BRCMSTB),y) CFLAGS_platsmp-brcmstb.o += -march=armv7-a obj-y += brcmstb.o -obj-$(CONFIG_SMP) += headsmp-brcmstb.o platsmp-brcmstb.o +obj-$(CONFIG_SMP) += platsmp-brcmstb.o endif diff --git a/arch/arm/mach-bcm/brcmstb.h b/arch/arm/mach-bcm/brcmstb.h deleted file mode 100644 index ec0c3d112b367..0000000000000 --- a/arch/arm/mach-bcm/brcmstb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __BRCMSTB_H__ -#define __BRCMSTB_H__ - -void brcmstb_secondary_startup(void); - -#endif /* __BRCMSTB_H__ */ diff --git a/arch/arm/mach-bcm/headsmp-brcmstb.S b/arch/arm/mach-bcm/headsmp-brcmstb.S deleted file mode 100644 index 199c1ea582480..0000000000000 --- a/arch/arm/mach-bcm/headsmp-brcmstb.S +++ /dev/null @@ -1,33 +0,0 @@ -/* - * SMP boot code for secondary CPUs - * Based on arch/arm/mach-tegra/headsmp.S - * - * Copyright (C) 2010 NVIDIA, Inc. - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include - - .section ".text.head", "ax" - -ENTRY(brcmstb_secondary_startup) - /* - * Ensure CPU is in a sane state by disabling all IRQs and switching - * into SVC mode. - */ - setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r0 - - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(brcmstb_secondary_startup) diff --git a/arch/arm/mach-bcm/platsmp-brcmstb.c b/arch/arm/mach-bcm/platsmp-brcmstb.c index e209e6fc7cafa..44d6bddf7a4e7 100644 --- a/arch/arm/mach-bcm/platsmp-brcmstb.c +++ b/arch/arm/mach-bcm/platsmp-brcmstb.c @@ -30,8 +30,6 @@ #include #include -#include "brcmstb.h" - enum { ZONE_MAN_CLKEN_MASK = BIT(0), ZONE_MAN_RESET_CNTL_MASK = BIT(1), @@ -153,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu) * Set the reset vector to point to the secondary_startup * routine */ - cpu_set_boot_addr(cpu, virt_to_phys(brcmstb_secondary_startup)); + cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup)); /* Unhalt the cpu */ cpu_rst_cfg_set(cpu, 0); diff --git a/arch/arm/mach-berlin/headsmp.S b/arch/arm/mach-berlin/headsmp.S index 4a4c56a58ad35..dc82a3486b05e 100644 --- a/arch/arm/mach-berlin/headsmp.S +++ b/arch/arm/mach-berlin/headsmp.S @@ -12,12 +12,6 @@ #include #include -ENTRY(berlin_secondary_startup) - ARM_BE8(setend be) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(berlin_secondary_startup) - /* * If the following instruction is set in the reset exception vector, CPUs * will fetch the value of the software reset address vector when being diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c index 702e7982015ab..34a3753e73564 100644 --- a/arch/arm/mach-berlin/platsmp.c +++ b/arch/arm/mach-berlin/platsmp.c @@ -22,7 +22,6 @@ #define RESET_VECT 0x00 #define SW_RESET_ADDR 0x94 -extern void berlin_secondary_startup(void); extern u32 boot_inst; static void __iomem *cpu_ctrl; @@ -85,7 +84,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus) * Write the secondary startup address into the SW reset address * vector. This is used by boot_inst. */ - writel(virt_to_phys(berlin_secondary_startup), vectors_base + SW_RESET_ADDR); + writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR); iounmap(vectors_base); unmap_scu: diff --git a/arch/arm/mach-dove/include/mach/irqs.h b/arch/arm/mach-dove/include/mach/irqs.h index 03d401d20453e..3f29e6bca0586 100644 --- a/arch/arm/mach-dove/include/mach/irqs.h +++ b/arch/arm/mach-dove/include/mach/irqs.h @@ -14,73 +14,73 @@ /* * Dove Low Interrupt Controller */ -#define IRQ_DOVE_BRIDGE 0 -#define IRQ_DOVE_H2C 1 -#define IRQ_DOVE_C2H 2 -#define IRQ_DOVE_NAND 3 -#define IRQ_DOVE_PDMA 4 -#define IRQ_DOVE_SPI1 5 -#define IRQ_DOVE_SPI0 6 -#define IRQ_DOVE_UART_0 7 -#define IRQ_DOVE_UART_1 8 -#define IRQ_DOVE_UART_2 9 -#define IRQ_DOVE_UART_3 10 -#define IRQ_DOVE_I2C 11 -#define IRQ_DOVE_GPIO_0_7 12 -#define IRQ_DOVE_GPIO_8_15 13 -#define IRQ_DOVE_GPIO_16_23 14 -#define IRQ_DOVE_PCIE0_ERR 15 -#define IRQ_DOVE_PCIE0 16 -#define IRQ_DOVE_PCIE1_ERR 17 -#define IRQ_DOVE_PCIE1 18 -#define IRQ_DOVE_I2S0 19 -#define IRQ_DOVE_I2S0_ERR 20 -#define IRQ_DOVE_I2S1 21 -#define IRQ_DOVE_I2S1_ERR 22 -#define IRQ_DOVE_USB_ERR 23 -#define IRQ_DOVE_USB0 24 -#define IRQ_DOVE_USB1 25 -#define IRQ_DOVE_GE00_RX 26 -#define IRQ_DOVE_GE00_TX 27 -#define IRQ_DOVE_GE00_MISC 28 -#define IRQ_DOVE_GE00_SUM 29 -#define IRQ_DOVE_GE00_ERR 30 -#define IRQ_DOVE_CRYPTO 31 +#define IRQ_DOVE_BRIDGE (1 + 0) +#define IRQ_DOVE_H2C (1 + 1) +#define IRQ_DOVE_C2H (1 + 2) +#define IRQ_DOVE_NAND (1 + 3) +#define IRQ_DOVE_PDMA (1 + 4) +#define IRQ_DOVE_SPI1 (1 + 5) +#define IRQ_DOVE_SPI0 (1 + 6) +#define IRQ_DOVE_UART_0 (1 + 7) +#define IRQ_DOVE_UART_1 (1 + 8) +#define IRQ_DOVE_UART_2 (1 + 9) +#define IRQ_DOVE_UART_3 (1 + 10) +#define IRQ_DOVE_I2C (1 + 11) +#define IRQ_DOVE_GPIO_0_7 (1 + 12) +#define IRQ_DOVE_GPIO_8_15 (1 + 13) +#define IRQ_DOVE_GPIO_16_23 (1 + 14) +#define IRQ_DOVE_PCIE0_ERR (1 + 15) +#define IRQ_DOVE_PCIE0 (1 + 16) +#define IRQ_DOVE_PCIE1_ERR (1 + 17) +#define IRQ_DOVE_PCIE1 (1 + 18) +#define IRQ_DOVE_I2S0 (1 + 19) +#define IRQ_DOVE_I2S0_ERR (1 + 20) +#define IRQ_DOVE_I2S1 (1 + 21) +#define IRQ_DOVE_I2S1_ERR (1 + 22) +#define IRQ_DOVE_USB_ERR (1 + 23) +#define IRQ_DOVE_USB0 (1 + 24) +#define IRQ_DOVE_USB1 (1 + 25) +#define IRQ_DOVE_GE00_RX (1 + 26) +#define IRQ_DOVE_GE00_TX (1 + 27) +#define IRQ_DOVE_GE00_MISC (1 + 28) +#define IRQ_DOVE_GE00_SUM (1 + 29) +#define IRQ_DOVE_GE00_ERR (1 + 30) +#define IRQ_DOVE_CRYPTO (1 + 31) /* * Dove High Interrupt Controller */ -#define IRQ_DOVE_AC97 32 -#define IRQ_DOVE_PMU 33 -#define IRQ_DOVE_CAM 34 -#define IRQ_DOVE_SDIO0 35 -#define IRQ_DOVE_SDIO1 36 -#define IRQ_DOVE_SDIO0_WAKEUP 37 -#define IRQ_DOVE_SDIO1_WAKEUP 38 -#define IRQ_DOVE_XOR_00 39 -#define IRQ_DOVE_XOR_01 40 -#define IRQ_DOVE_XOR0_ERR 41 -#define IRQ_DOVE_XOR_10 42 -#define IRQ_DOVE_XOR_11 43 -#define IRQ_DOVE_XOR1_ERR 44 -#define IRQ_DOVE_LCD_DCON 45 -#define IRQ_DOVE_LCD1 46 -#define IRQ_DOVE_LCD0 47 -#define IRQ_DOVE_GPU 48 -#define IRQ_DOVE_PERFORM_MNTR 49 -#define IRQ_DOVE_VPRO_DMA1 51 -#define IRQ_DOVE_SSP_TIMER 54 -#define IRQ_DOVE_SSP 55 -#define IRQ_DOVE_MC_L2_ERR 56 -#define IRQ_DOVE_CRYPTO_ERR 59 -#define IRQ_DOVE_GPIO_24_31 60 -#define IRQ_DOVE_HIGH_GPIO 61 -#define IRQ_DOVE_SATA 62 +#define IRQ_DOVE_AC97 (1 + 32) +#define IRQ_DOVE_PMU (1 + 33) +#define IRQ_DOVE_CAM (1 + 34) +#define IRQ_DOVE_SDIO0 (1 + 35) +#define IRQ_DOVE_SDIO1 (1 + 36) +#define IRQ_DOVE_SDIO0_WAKEUP (1 + 37) +#define IRQ_DOVE_SDIO1_WAKEUP (1 + 38) +#define IRQ_DOVE_XOR_00 (1 + 39) +#define IRQ_DOVE_XOR_01 (1 + 40) +#define IRQ_DOVE_XOR0_ERR (1 + 41) +#define IRQ_DOVE_XOR_10 (1 + 42) +#define IRQ_DOVE_XOR_11 (1 + 43) +#define IRQ_DOVE_XOR1_ERR (1 + 44) +#define IRQ_DOVE_LCD_DCON (1 + 45) +#define IRQ_DOVE_LCD1 (1 + 46) +#define IRQ_DOVE_LCD0 (1 + 47) +#define IRQ_DOVE_GPU (1 + 48) +#define IRQ_DOVE_PERFORM_MNTR (1 + 49) +#define IRQ_DOVE_VPRO_DMA1 (1 + 51) +#define IRQ_DOVE_SSP_TIMER (1 + 54) +#define IRQ_DOVE_SSP (1 + 55) +#define IRQ_DOVE_MC_L2_ERR (1 + 56) +#define IRQ_DOVE_CRYPTO_ERR (1 + 59) +#define IRQ_DOVE_GPIO_24_31 (1 + 60) +#define IRQ_DOVE_HIGH_GPIO (1 + 61) +#define IRQ_DOVE_SATA (1 + 62) /* * DOVE General Purpose Pins */ -#define IRQ_DOVE_GPIO_START 64 +#define IRQ_DOVE_GPIO_START 65 #define NR_GPIO_IRQS 64 /* diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 4a5a7aedcb763..df0223f76fa92 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -126,14 +126,14 @@ __exception_irq_entry dove_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_LOW_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_LOW_OFF); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_HIGH_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_HIGH_OFF); if (stat) { - unsigned int hwirq = 32 + __fls(stat); + unsigned int hwirq = 33 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -144,8 +144,8 @@ void __init dove_init_irq(void) { int i; - orion_irq_init(0, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); - orion_irq_init(32, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); + orion_irq_init(1, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); + orion_irq_init(33, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(dove_legacy_handle_irq); diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 9bdf54795f05d..56978199c4798 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "regs-pmu.h" #include "common.h" @@ -70,7 +71,31 @@ static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) cluster >= EXYNOS5420_NR_CLUSTERS) return -EINVAL; - exynos_cpu_power_up(cpunr); + if (!exynos_cpu_power_state(cpunr)) { + exynos_cpu_power_up(cpunr); + + /* + * This assumes the cluster number of the big cores(Cortex A15) + * is 0 and the Little cores(Cortex A7) is 1. + * When the system was booted from the Little core, + * they should be reset during power up cpu. + */ + if (cluster && + cluster == MPIDR_AFFINITY_LEVEL(cpu_logical_map(0), 1)) { + /* + * Before we reset the Little cores, we should wait + * the SPARE2 register is set to 1 because the init + * codes of the iROM will set the register after + * initialization. + */ + while (!pmu_raw_readl(S5P_PMU_SPARE2)) + udelay(10); + + pmu_raw_writel(EXYNOS5420_KFC_CORE_RESET(cpu), + EXYNOS_SWRESET); + } + } + return 0; } diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h index b7614333d2968..fba9068ed260d 100644 --- a/arch/arm/mach-exynos/regs-pmu.h +++ b/arch/arm/mach-exynos/regs-pmu.h @@ -513,6 +513,12 @@ static inline unsigned int exynos_pmu_cpunr(unsigned int mpidr) #define SPREAD_ENABLE 0xF #define SPREAD_USE_STANDWFI 0xF +#define EXYNOS5420_KFC_CORE_RESET0 BIT(8) +#define EXYNOS5420_KFC_ETM_RESET0 BIT(20) + +#define EXYNOS5420_KFC_CORE_RESET(_nr) \ + ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) + #define EXYNOS5420_BB_CON1 0x0784 #define EXYNOS5420_BB_SEL_EN BIT(31) #define EXYNOS5420_BB_PMOS_EN BIT(7) diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile index 6b7b3033de0bc..659db1933ed36 100644 --- a/arch/arm/mach-hisi/Makefile +++ b/arch/arm/mach-hisi/Makefile @@ -6,4 +6,4 @@ CFLAGS_platmcpm.o := -march=armv7-a obj-y += hisilicon.o obj-$(CONFIG_MCPM) += platmcpm.o -obj-$(CONFIG_SMP) += platsmp.o hotplug.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o hotplug.o diff --git a/arch/arm/mach-hisi/core.h b/arch/arm/mach-hisi/core.h index 92a682d8e9394..c7648ef1825c7 100644 --- a/arch/arm/mach-hisi/core.h +++ b/arch/arm/mach-hisi/core.h @@ -12,7 +12,6 @@ extern void hi3xxx_cpu_die(unsigned int cpu); extern int hi3xxx_cpu_kill(unsigned int cpu); extern void hi3xxx_set_cpu(int cpu, bool enable); -extern void hisi_secondary_startup(void); extern struct smp_operations hix5hd2_smp_ops; extern void hix5hd2_set_cpu(int cpu, bool enable); extern void hix5hd2_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-hisi/headsmp.S b/arch/arm/mach-hisi/headsmp.S deleted file mode 100644 index 81e35b159e75e..0000000000000 --- a/arch/arm/mach-hisi/headsmp.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2014 Hisilicon Limited. - * Copyright (c) 2014 Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include - - __CPUINIT - -ENTRY(hisi_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index 8880c8e8b296f..51744127db666 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle) { phys_addr_t jumpaddr; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr); hix5hd2_set_cpu(cpu, true); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) struct device_node *node; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr); node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl"); diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 469a150bf98f9..a2e8ef3c0bd9f 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -443,7 +443,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) clk[IMX6QDL_CLK_GPMI_IO] = imx_clk_gate2("gpmi_io", "enfc", base + 0x78, 28); clk[IMX6QDL_CLK_GPMI_APB] = imx_clk_gate2("gpmi_apb", "usdhc3", base + 0x78, 30); clk[IMX6QDL_CLK_ROM] = imx_clk_gate2("rom", "ahb", base + 0x7c, 0); - clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ipg", base + 0x7c, 4); + clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ahb", base + 0x7c, 4); clk[IMX6QDL_CLK_SDMA] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6); clk[IMX6QDL_CLK_SPBA] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12); clk[IMX6QDL_CLK_SPDIF] = imx_clk_gate2("spdif", "spdif_podf", base + 0x7c, 14); diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 6d0893a3828eb..78b6fd0b86e69 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -291,8 +291,6 @@ void __init imx_gpc_check_dt(void) } } -#ifdef CONFIG_PM_GENERIC_DOMAINS - static void _imx6q_pm_pu_power_off(struct generic_pm_domain *genpd) { int iso, iso2sw; @@ -399,7 +397,6 @@ static struct genpd_onecell_data imx_gpc_onecell_data = { static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) { struct clk *clk; - bool is_off; int i; imx6q_pu_domain.reg = pu_reg; @@ -416,18 +413,13 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) } imx6q_pu_domain.num_clks = i; - is_off = IS_ENABLED(CONFIG_PM); - if (is_off) { - _imx6q_pm_pu_power_off(&imx6q_pu_domain.base); - } else { - /* - * Enable power if compiled without CONFIG_PM in case the - * bootloader disabled it. - */ - imx6q_pm_pu_power_on(&imx6q_pu_domain.base); - } + /* Enable power always in case bootloader disabled it. */ + imx6q_pm_pu_power_on(&imx6q_pu_domain.base); + + if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) + return 0; - pm_genpd_init(&imx6q_pu_domain.base, NULL, is_off); + pm_genpd_init(&imx6q_pu_domain.base, NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); @@ -437,13 +429,6 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) return -EINVAL; } -#else -static inline int imx_gpc_genpd_init(struct device *dev, struct regulator *reg) -{ - return 0; -} -#endif /* CONFIG_PM_GENERIC_DOMAINS */ - static int imx_gpc_probe(struct platform_device *pdev) { struct regulator *pu_reg; diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S index de5047c8a6c87..b5e976816b63c 100644 --- a/arch/arm/mach-imx/headsmp.S +++ b/arch/arm/mach-imx/headsmp.S @@ -25,7 +25,6 @@ diag_reg_offset: .endm ENTRY(v7_secondary_startup) - bl v7_invalidate_l1 set_diag_reg b secondary_startup ENDPROC(v7_secondary_startup) diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 08d5ed46b996b..48e4c4b3cd1c9 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -21,7 +21,6 @@ ENTRY(mvebu_cortex_a9_secondary_startup) ARM_BE8(setend be) - bl v7_invalidate_l1 bl armada_38x_scu_power_up b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c index 6dfd4ab97b2aa..301ab38d38ba8 100644 --- a/arch/arm/mach-mvebu/pm-board.c +++ b/arch/arm/mach-mvebu/pm-board.c @@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd) for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) ackcmd |= BIT(pic_raw_gpios[i]); + srcmd = cpu_to_le32(srcmd); + ackcmd = cpu_to_le32(ackcmd); + /* * Wait a while, the PIC needs quite a bit of time between the * two GPIO commands. diff --git a/arch/arm/mach-omap2/clockdomains7xx_data.c b/arch/arm/mach-omap2/clockdomains7xx_data.c index 57d5df0c1fbd0..7581e036bda62 100644 --- a/arch/arm/mach-omap2/clockdomains7xx_data.c +++ b/arch/arm/mach-omap2/clockdomains7xx_data.c @@ -331,7 +331,7 @@ static struct clockdomain l4per2_7xx_clkdm = { .dep_bit = DRA7XX_L4PER2_STATDEP_SHIFT, .wkdep_srcs = l4per2_wkup_sleep_deps, .sleepdep_srcs = l4per2_wkup_sleep_deps, - .flags = CLKDM_CAN_HWSUP_SWSUP, + .flags = CLKDM_CAN_SWSUP, }; static struct clockdomain mpu0_7xx_clkdm = { diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 3b56722dfd8a9..6833df45d7b1d 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = { .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 752969ff9de04..5286e7773ed40 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2373,6 +2373,9 @@ static int of_dev_hwmod_lookup(struct device_node *np, * registers. This address is needed early so the OCP registers that * are part of the device's address space can be ioremapped properly. * + * If SYSC access is not needed, the registers will not be remapped + * and non-availability of MPU access is not treated as an error. + * * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and * -ENXIO on absent or invalid register target address space. */ @@ -2387,6 +2390,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, _save_mpu_port_index(oh); + /* if we don't need sysc access we don't need to ioremap */ + if (!oh->class->sysc) + return 0; + + /* we can't continue without MPU PORT if we need sysc access */ if (oh->_int_flags & _HWMOD_NO_MPU_PORT) return -ENXIO; @@ -2396,8 +2404,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, oh->name); /* Extract the IO space from device tree blob */ - if (!np) + if (!np) { + pr_err("omap_hwmod: %s: no dt node\n", oh->name); return -ENXIO; + } va_start = of_iomap(np, index + oh->mpu_rt_idx); } else { @@ -2456,13 +2466,11 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->name, np->name); } - if (oh->class->sysc) { - r = _init_mpu_rt_base(oh, NULL, index, np); - if (r < 0) { - WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", - oh->name); - return 0; - } + r = _init_mpu_rt_base(oh, NULL, index, np); + if (r < 0) { + WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", + oh->name); + return 0; } r = _init_clocks(oh, NULL); diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index eafd120b53f1b..8e2a7acb823b6 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -86,13 +86,18 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) stmfd sp!, {lr} @ save registers on stack /* Setup so that we will disable and enable l2 */ mov r1, #0x1 - adrl r2, l2dis_3630 @ may be too distant for plain adr - str r1, [r2] + adrl r3, l2dis_3630_offset @ may be too distant for plain adr + ldr r2, [r3] @ value for offset + str r1, [r2, r3] @ write to l2dis_3630 ldmfd sp!, {pc} @ restore regs and return ENDPROC(enable_omap3630_toggle_l2_on_restore) - .text -/* Function to call rom code to save secure ram context */ +/* + * Function to call rom code to save secure ram context. This gets + * relocated to SRAM, so it can be all in .data section. Otherwise + * we need to initialize api_params separately. + */ + .data .align 3 ENTRY(save_secure_ram_context) stmfd sp!, {r4 - r11, lr} @ save registers on stack @@ -126,6 +131,8 @@ ENDPROC(save_secure_ram_context) ENTRY(save_secure_ram_context_sz) .word . - save_secure_ram_context + .text + /* * ====================== * == Idle entry point == @@ -289,12 +296,6 @@ wait_sdrc_ready: bic r5, r5, #0x40 str r5, [r4] -/* - * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a - * base instead. - * Be careful not to clobber r7 when maintaing this code. - */ - is_dll_in_lock_mode: /* Is dll in lock mode? */ ldr r4, sdrc_dlla_ctrl @@ -302,11 +303,7 @@ is_dll_in_lock_mode: tst r5, #0x4 bne exit_nonoff_modes @ Return if locked /* wait till dll locks */ - adr r7, kick_counter wait_dll_lock_timed: - ldr r4, wait_dll_lock_counter - add r4, r4, #1 - str r4, [r7, #wait_dll_lock_counter - kick_counter] ldr r4, sdrc_dlla_status /* Wait 20uS for lock */ mov r6, #8 @@ -330,9 +327,6 @@ kick_dll: orr r6, r6, #(1<<3) @ enable dll str r6, [r4] dsb - ldr r4, kick_counter - add r4, r4, #1 - str r4, [r7] @ kick_counter b wait_dll_lock_timed exit_nonoff_modes: @@ -360,15 +354,6 @@ sdrc_dlla_status: .word SDRC_DLLA_STATUS_V sdrc_dlla_ctrl: .word SDRC_DLLA_CTRL_V - /* - * When exporting to userspace while the counters are in SRAM, - * these 2 words need to be at the end to facilitate retrival! - */ -kick_counter: - .word 0 -wait_dll_lock_counter: - .word 0 - ENTRY(omap3_do_wfi_sz) .word . - omap3_do_wfi @@ -437,7 +422,9 @@ ENTRY(omap3_restore) cmp r2, #0x0 @ Check if target power state was OFF or RET bne logic_l1_restore - ldr r0, l2dis_3630 + adr r1, l2dis_3630_offset @ address for offset + ldr r0, [r1] @ value for offset + ldr r0, [r1, r0] @ value at l2dis_3630 cmp r0, #0x1 @ should we disable L2 on 3630? bne skipl2dis mrc p15, 0, r0, c1, c0, 1 @@ -506,7 +493,9 @@ l2_inv_gp: mov r12, #0x2 smc #0 @ Call SMI monitor (smieq) logic_l1_restore: - ldr r1, l2dis_3630 + adr r0, l2dis_3630_offset @ adress for offset + ldr r1, [r0] @ value for offset + ldr r1, [r0, r1] @ value at l2dis_3630 cmp r1, #0x1 @ Test if L2 re-enable needed on 3630 bne skipl2reen mrc p15, 0, r1, c1, c0, 1 @@ -535,6 +524,10 @@ control_stat: .word CONTROL_STAT control_mem_rta: .word CONTROL_MEM_RTA_CTRL +l2dis_3630_offset: + .long l2dis_3630 - . + + .data l2dis_3630: .word 0 diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index ad1bb9431e941..5373a32817798 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -29,12 +29,6 @@ dsb .endm -ppa_zero_params: - .word 0x0 - -ppa_por_params: - .word 1, 0 - #ifdef CONFIG_ARCH_OMAP4 /* @@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume) beq skip_ns_smp_enable ppa_actrl_retry: mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX - adr r3, ppa_zero_params @ Pointer to parameters + adr r1, ppa_zero_params_offset + ldr r3, [r1] + add r3, r3, r1 @ Pointer to ppa_zero_params mov r1, #0x0 @ Process ID mov r2, #0x4 @ Flag mov r6, #0xff @@ -303,7 +299,9 @@ skip_ns_smp_enable: ldr r0, =OMAP4_PPA_L2_POR_INDEX ldr r1, =OMAP44XX_SAR_RAM_BASE ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET] - adr r3, ppa_por_params + adr r1, ppa_por_params_offset + ldr r3, [r1] + add r3, r3, r1 @ Pointer to ppa_por_params str r4, [r3, #0x04] mov r1, #0x0 @ Process ID mov r2, #0x4 @ Flag @@ -328,6 +326,8 @@ skip_l2en: #endif b cpu_resume @ Jump to generic resume +ppa_por_params_offset: + .long ppa_por_params - . ENDPROC(omap4_cpu_resume) #endif /* CONFIG_ARCH_OMAP4 */ @@ -382,4 +382,13 @@ ENTRY(omap_do_wfi) nop ldmfd sp!, {pc} +ppa_zero_params_offset: + .long ppa_zero_params - . ENDPROC(omap_do_wfi) + + .data +ppa_zero_params: + .word 0 + +ppa_por_params: + .word 1, 0 diff --git a/arch/arm/mach-orion5x/include/mach/irqs.h b/arch/arm/mach-orion5x/include/mach/irqs.h index a6fa9d8f12d88..2431d99234273 100644 --- a/arch/arm/mach-orion5x/include/mach/irqs.h +++ b/arch/arm/mach-orion5x/include/mach/irqs.h @@ -16,42 +16,42 @@ /* * Orion Main Interrupt Controller */ -#define IRQ_ORION5X_BRIDGE 0 -#define IRQ_ORION5X_DOORBELL_H2C 1 -#define IRQ_ORION5X_DOORBELL_C2H 2 -#define IRQ_ORION5X_UART0 3 -#define IRQ_ORION5X_UART1 4 -#define IRQ_ORION5X_I2C 5 -#define IRQ_ORION5X_GPIO_0_7 6 -#define IRQ_ORION5X_GPIO_8_15 7 -#define IRQ_ORION5X_GPIO_16_23 8 -#define IRQ_ORION5X_GPIO_24_31 9 -#define IRQ_ORION5X_PCIE0_ERR 10 -#define IRQ_ORION5X_PCIE0_INT 11 -#define IRQ_ORION5X_USB1_CTRL 12 -#define IRQ_ORION5X_DEV_BUS_ERR 14 -#define IRQ_ORION5X_PCI_ERR 15 -#define IRQ_ORION5X_USB_BR_ERR 16 -#define IRQ_ORION5X_USB0_CTRL 17 -#define IRQ_ORION5X_ETH_RX 18 -#define IRQ_ORION5X_ETH_TX 19 -#define IRQ_ORION5X_ETH_MISC 20 -#define IRQ_ORION5X_ETH_SUM 21 -#define IRQ_ORION5X_ETH_ERR 22 -#define IRQ_ORION5X_IDMA_ERR 23 -#define IRQ_ORION5X_IDMA_0 24 -#define IRQ_ORION5X_IDMA_1 25 -#define IRQ_ORION5X_IDMA_2 26 -#define IRQ_ORION5X_IDMA_3 27 -#define IRQ_ORION5X_CESA 28 -#define IRQ_ORION5X_SATA 29 -#define IRQ_ORION5X_XOR0 30 -#define IRQ_ORION5X_XOR1 31 +#define IRQ_ORION5X_BRIDGE (1 + 0) +#define IRQ_ORION5X_DOORBELL_H2C (1 + 1) +#define IRQ_ORION5X_DOORBELL_C2H (1 + 2) +#define IRQ_ORION5X_UART0 (1 + 3) +#define IRQ_ORION5X_UART1 (1 + 4) +#define IRQ_ORION5X_I2C (1 + 5) +#define IRQ_ORION5X_GPIO_0_7 (1 + 6) +#define IRQ_ORION5X_GPIO_8_15 (1 + 7) +#define IRQ_ORION5X_GPIO_16_23 (1 + 8) +#define IRQ_ORION5X_GPIO_24_31 (1 + 9) +#define IRQ_ORION5X_PCIE0_ERR (1 + 10) +#define IRQ_ORION5X_PCIE0_INT (1 + 11) +#define IRQ_ORION5X_USB1_CTRL (1 + 12) +#define IRQ_ORION5X_DEV_BUS_ERR (1 + 14) +#define IRQ_ORION5X_PCI_ERR (1 + 15) +#define IRQ_ORION5X_USB_BR_ERR (1 + 16) +#define IRQ_ORION5X_USB0_CTRL (1 + 17) +#define IRQ_ORION5X_ETH_RX (1 + 18) +#define IRQ_ORION5X_ETH_TX (1 + 19) +#define IRQ_ORION5X_ETH_MISC (1 + 20) +#define IRQ_ORION5X_ETH_SUM (1 + 21) +#define IRQ_ORION5X_ETH_ERR (1 + 22) +#define IRQ_ORION5X_IDMA_ERR (1 + 23) +#define IRQ_ORION5X_IDMA_0 (1 + 24) +#define IRQ_ORION5X_IDMA_1 (1 + 25) +#define IRQ_ORION5X_IDMA_2 (1 + 26) +#define IRQ_ORION5X_IDMA_3 (1 + 27) +#define IRQ_ORION5X_CESA (1 + 28) +#define IRQ_ORION5X_SATA (1 + 29) +#define IRQ_ORION5X_XOR0 (1 + 30) +#define IRQ_ORION5X_XOR1 (1 + 31) /* * Orion General Purpose Pins */ -#define IRQ_ORION5X_GPIO_START 32 +#define IRQ_ORION5X_GPIO_START 33 #define NR_GPIO_IRQS 32 #define NR_IRQS (IRQ_ORION5X_GPIO_START + NR_GPIO_IRQS) diff --git a/arch/arm/mach-orion5x/irq.c b/arch/arm/mach-orion5x/irq.c index cd4bac4d7e43f..086ecb87d8858 100644 --- a/arch/arm/mach-orion5x/irq.c +++ b/arch/arm/mach-orion5x/irq.c @@ -42,7 +42,7 @@ __exception_irq_entry orion5x_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(MAIN_IRQ_CAUSE); stat &= readl_relaxed(MAIN_IRQ_MASK); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -51,7 +51,7 @@ __exception_irq_entry orion5x_legacy_handle_irq(struct pt_regs *regs) void __init orion5x_init_irq(void) { - orion_irq_init(0, MAIN_IRQ_MASK); + orion_irq_init(1, MAIN_IRQ_MASK); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(orion5x_legacy_handle_irq); diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S index d86fe33c5f538..209d9fc5c16cf 100644 --- a/arch/arm/mach-prima2/headsmp.S +++ b/arch/arm/mach-prima2/headsmp.S @@ -15,7 +15,6 @@ * ready for them to initialise. */ ENTRY(sirfsoc_secondary_startup) - bl v7_invalidate_l1 mrc p15, 0, r0, c0, c0, 5 and r0, r0, #15 adr r4, 1f diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c index c092730749b9d..bf366b39fa611 100644 --- a/arch/arm/mach-pxa/capc7117.c +++ b/arch/arm/mach-pxa/capc7117.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,8 @@ static void __init capc7117_init(void) capc7117_uarts_init(); capc7117_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(CAPC7117, diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c index bb99f59a36d88..a17a91eb8e9a3 100644 --- a/arch/arm/mach-pxa/cm-x2xx.c +++ b/arch/arm/mach-pxa/cm-x2xx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -466,6 +467,8 @@ static void __init cmx2xx_init(void) cmx2xx_init_ac97(); cmx2xx_init_touchscreen(); cmx2xx_init_leds(); + + regulator_has_full_constraints(); } static void __init cmx2xx_init_irq(void) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 4d3588d26c2a1..5851f4c254c16 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -835,6 +835,8 @@ static void __init cm_x300_init(void) cm_x300_init_ac97(); cm_x300_init_wi2wi(); cm_x300_init_bl(); + + regulator_has_full_constraints(); } static void __init cm_x300_fixup(struct tag *tags, char **cmdline) diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c index 5f9d9303b346d..3503826333c74 100644 --- a/arch/arm/mach-pxa/colibri-pxa270.c +++ b/arch/arm/mach-pxa/colibri-pxa270.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void) printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n", colibri_pxa270_baseboard); } + + regulator_has_full_constraints(); } /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 51531ecffca85..9d7072b040458 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -1306,6 +1306,8 @@ static void __init em_x270_init(void) em_x270_init_i2c(); em_x270_init_camera(); em_x270_userspace_consumers_init(); + + regulator_has_full_constraints(); } MACHINE_START(EM_X270, "Compulab EM-X270") diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index c98511c5abd10..9b0eb0252af6f 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "generic.h" @@ -185,6 +186,8 @@ static void __init icontrol_init(void) mxm_8x10_mmc_init(); icontrol_can_init(); + + regulator_has_full_constraints(); } MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM") diff --git a/arch/arm/mach-pxa/include/mach/pxa27x.h b/arch/arm/mach-pxa/include/mach/pxa27x.h index 599b925a657c4..1a4291936c582 100644 --- a/arch/arm/mach-pxa/include/mach/pxa27x.h +++ b/arch/arm/mach-pxa/include/mach/pxa27x.h @@ -19,7 +19,7 @@ #define ARB_CORE_PARK (1<<24) /* Be parked with core when idle */ #define ARB_LOCK_FLAG (1<<23) /* Only Locking masters gain access to the bus */ -extern int __init pxa27x_set_pwrmode(unsigned int mode); +extern int pxa27x_set_pwrmode(unsigned int mode); extern void pxa27x_cpu_pm_enter(suspend_state_t state); #endif /* __MACH_PXA27x_H */ diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index af423a48c2e3b..782e6b98dd9a7 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -251,7 +251,7 @@ static struct clk_lookup pxa27x_clkregs[] = { */ static unsigned int pwrmode = PWRMODE_SLEEP; -int __init pxa27x_set_pwrmode(unsigned int mode) +int pxa27x_set_pwrmode(unsigned int mode) { switch (mode) { case PWRMODE_SLEEP: diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 872dcb20e7578..066e3a250ee03 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -534,6 +535,8 @@ static void __init trizeps4_init(void) BCR_writew(trizeps_conxs_bcr); board_backlight_power(1); + + regulator_has_full_constraints(); } static void __init trizeps4_map_io(void) diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index aa89488f961ec..54122a983ae37 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -711,6 +712,8 @@ static void __init vpac270_init(void) vpac270_ts_init(); vpac270_rtc_init(); vpac270_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(VPAC270, "Voipac PXA270") diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index ac2ae5c71ab45..6158566fa0f74 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -868,6 +868,8 @@ static void __init zeus_init(void) i2c_register_board_info(0, ARRAY_AND_SIZE(zeus_i2c_devices)); pxa2xx_set_spi_info(3, &pxa2xx_spi_ssp3_master_info); spi_register_board_info(zeus_spi_board_info, ARRAY_SIZE(zeus_spi_board_info)); + + regulator_has_full_constraints(); } static struct map_desc zeus_io_desc[] __initdata = { diff --git a/arch/arm/mach-rockchip/core.h b/arch/arm/mach-rockchip/core.h index 39bca96b555a6..492c048813da6 100644 --- a/arch/arm/mach-rockchip/core.h +++ b/arch/arm/mach-rockchip/core.h @@ -17,4 +17,3 @@ extern char rockchip_secondary_trampoline; extern char rockchip_secondary_trampoline_end; extern unsigned long rockchip_boot_fn; -extern void rockchip_secondary_startup(void); diff --git a/arch/arm/mach-rockchip/headsmp.S b/arch/arm/mach-rockchip/headsmp.S index 46c22dedf632a..d69708b072829 100644 --- a/arch/arm/mach-rockchip/headsmp.S +++ b/arch/arm/mach-rockchip/headsmp.S @@ -15,14 +15,6 @@ #include #include -ENTRY(rockchip_secondary_startup) - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - ldr r1, =0x00000c09 @ Cortex-A9 primary part number - teq r0, r1 - beq v7_invalidate_l1 - b secondary_startup -ENDPROC(rockchip_secondary_startup) - ENTRY(rockchip_secondary_trampoline) ldr pc, 1f ENDPROC(rockchip_secondary_trampoline) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 5b4ca3c3c8797..611a5f96d3ca0 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -72,29 +72,22 @@ static struct reset_control *rockchip_get_core_reset(int cpu) static int pmu_set_power_domain(int pd, bool on) { u32 val = (on) ? 0 : BIT(pd); + struct reset_control *rstc = rockchip_get_core_reset(pd); int ret; + if (IS_ERR(rstc) && read_cpuid_part() != ARM_CPU_PART_CORTEX_A9) { + pr_err("%s: could not get reset control for core %d\n", + __func__, pd); + return PTR_ERR(rstc); + } + /* * We need to soft reset the cpu when we turn off the cpu power domain, * or else the active processors might be stalled when the individual * processor is powered down. */ - if (read_cpuid_part() != ARM_CPU_PART_CORTEX_A9) { - struct reset_control *rstc = rockchip_get_core_reset(pd); - - if (IS_ERR(rstc)) { - pr_err("%s: could not get reset control for core %d\n", - __func__, pd); - return PTR_ERR(rstc); - } - - if (on) - reset_control_deassert(rstc); - else - reset_control_assert(rstc); - - reset_control_put(rstc); - } + if (!IS_ERR(rstc) && !on) + reset_control_assert(rstc); ret = regmap_update_bits(pmu, PMU_PWRDN_CON, BIT(pd), val); if (ret < 0) { @@ -112,6 +105,12 @@ static int pmu_set_power_domain(int pd, bool on) } } + if (!IS_ERR(rstc)) { + if (on) + reset_control_deassert(rstc); + reset_control_put(rstc); + } + return 0; } @@ -147,10 +146,13 @@ static int __cpuinit rockchip_boot_secondary(unsigned int cpu, * the mailbox: * sram_base_addr + 4: 0xdeadbeaf * sram_base_addr + 8: start address for pc + * The cpu0 need to wait the other cpus other than cpu0 entering + * the wfe state.The wait time is affected by many aspects. + * (e.g: cpu frequency, bootrom frequency, sram frequency, ...) * */ - udelay(10); - writel(virt_to_phys(rockchip_secondary_startup), - sram_base_addr + 8); + mdelay(1); /* ensure the cpus other than cpu0 to startup */ + + writel(virt_to_phys(secondary_startup), sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } @@ -189,7 +191,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) } /* set the boot function for the sram code */ - rockchip_boot_fn = virt_to_phys(rockchip_secondary_startup); + rockchip_boot_fn = virt_to_phys(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index afc60bad6fd6b..476092b86c6e4 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -14,7 +14,6 @@ extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); extern int shmobile_smp_cpu_disable(unsigned int cpu); -extern void shmobile_invalidate_start(void); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S index 69df8bfac1672..fa5248c52399c 100644 --- a/arch/arm/mach-shmobile/headsmp-scu.S +++ b/arch/arm/mach-shmobile/headsmp-scu.S @@ -22,7 +22,7 @@ * Boot code for secondary CPUs. * * First we turn on L1 cache coherency for our CPU. Then we jump to - * shmobile_invalidate_start that invalidates the cache and hands over control + * secondary_startup that invalidates the cache and hands over control * to the common ARM startup code. */ ENTRY(shmobile_boot_scu) @@ -36,7 +36,7 @@ ENTRY(shmobile_boot_scu) bic r2, r2, r3 @ Clear bits of our CPU (Run Mode) str r2, [r0, #8] @ write back - b shmobile_invalidate_start + b secondary_startup ENDPROC(shmobile_boot_scu) .text diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index 50c491567e11c..330c1fc63197d 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -16,13 +16,6 @@ #include #include -#ifdef CONFIG_SMP -ENTRY(shmobile_invalidate_start) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(shmobile_invalidate_start) -#endif - /* * Reset vector for secondary CPUs. * This will be mapped at address 0 by SBAR register. diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index f483b560b066a..b0790fc322824 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -133,7 +133,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle) { /* For this particular CPU register boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(shmobile_invalidate_start), 0); + shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0); return apmu_wrap(cpu, apmu_power_on); } diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index a0f3b1cd497cc..767c09e954a0f 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -31,7 +31,6 @@ #define RSTMGR_MPUMODRST_CPU1 0x2 /* CPU1 Reset */ -extern void socfpga_secondary_startup(void); extern void __iomem *socfpga_scu_base_addr; extern void socfpga_init_clocks(void); diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S index f65ea0af4af37..5bb0164271076 100644 --- a/arch/arm/mach-socfpga/headsmp.S +++ b/arch/arm/mach-socfpga/headsmp.S @@ -30,8 +30,3 @@ ENTRY(secondary_trampoline) 1: .long . .long socfpga_cpu1start_addr ENTRY(secondary_trampoline_end) - -ENTRY(socfpga_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(socfpga_secondary_startup) diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index c64d89b7c0ca8..79c5336c569ff 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); - writel(virt_to_phys(socfpga_secondary_startup), + writel(virt_to_phys(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); flush_cache_all(); diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index e48a74458c258..fffad2426ee4b 100644 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += pm-tegra30.o ifeq ($(CONFIG_CPU_IDLE),y) obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpuidle-tegra30.o endif -obj-$(CONFIG_SMP) += platsmp.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_ARCH_TEGRA_114_SOC) += sleep-tegra30.o diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index fbe74c6806f3b..49d1110cff534 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -39,8 +39,8 @@ static struct platform_device wifi_rfkill_device = { static struct gpiod_lookup_table wifi_gpio_lookup = { .dev_id = "rfkill_gpio", .table = { - GPIO_LOOKUP_IDX("tegra-gpio", 25, NULL, 0, 0), - GPIO_LOOKUP_IDX("tegra-gpio", 85, NULL, 1, 0), + GPIO_LOOKUP("tegra-gpio", 25, "reset", 0), + GPIO_LOOKUP("tegra-gpio", 85, "shutdown", 0), { }, }, }; diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 88de2dce2e872..7469347b17493 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -34,6 +34,7 @@ #include "iomap.h" #include "irq.h" #include "pm.h" +#include "reset.h" #include "sleep.h" #ifdef CONFIG_PM_SLEEP @@ -70,15 +71,13 @@ static struct cpuidle_driver tegra_idle_driver = { #ifdef CONFIG_PM_SLEEP #ifdef CONFIG_SMP -static void __iomem *pmc = IO_ADDRESS(TEGRA_PMC_BASE); - static int tegra20_reset_sleeping_cpu_1(void) { int ret = 0; tegra_pen_lock(); - if (readl(pmc + PMC_SCRATCH41) == CPU_RESETTABLE) + if (readb(tegra20_cpu1_resettable_status) == CPU_RESETTABLE) tegra20_cpu_shutdown(1); else ret = -EINVAL; diff --git a/arch/arm/mach-tegra/headsmp.S b/arch/arm/mach-tegra/headsmp.S deleted file mode 100644 index 2072e7322c398..0000000000000 --- a/arch/arm/mach-tegra/headsmp.S +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include - -#include "sleep.h" - - .section ".text.head", "ax" - -ENTRY(tegra_secondary_startup) - check_cpu_part_num 0xc09, r8, r9 - bleq v7_invalidate_l1 - b secondary_startup -ENDPROC(tegra_secondary_startup) diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 71be4af5e975b..e3070fdab80b8 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -169,10 +169,10 @@ after_errata: cmp r6, #TEGRA20 bne 1f /* If not CPU0, don't let CPU0 reset CPU1 now that CPU1 is coming up. */ - mov32 r5, TEGRA_PMC_BASE - mov r0, #0 + mov32 r5, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + mov r0, #CPU_NOT_RESETTABLE cmp r10, #0 - strne r0, [r5, #PMC_SCRATCH41] + strneb r0, [r5, #__tegra20_cpu1_resettable_status_offset] 1: #endif @@ -281,6 +281,10 @@ __tegra_cpu_reset_handler_data: .rept TEGRA_RESET_DATA_SIZE .long 0 .endr + .globl __tegra20_cpu1_resettable_status_offset + .equ __tegra20_cpu1_resettable_status_offset, \ + . - __tegra_cpu_reset_handler_start + .byte 0 .align L1_CACHE_SHIFT ENTRY(__tegra_cpu_reset_handler_end) diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 894c5c472184f..6fd9db54887ee 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -94,7 +94,7 @@ void __init tegra_cpu_reset_handler_init(void) __tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] = *((u32 *)cpu_possible_mask); __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] = - virt_to_phys((void *)tegra_secondary_startup); + virt_to_phys((void *)secondary_startup); #endif #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h index 76a93434c6ee0..9c479c7925b85 100644 --- a/arch/arm/mach-tegra/reset.h +++ b/arch/arm/mach-tegra/reset.h @@ -35,8 +35,8 @@ extern unsigned long __tegra_cpu_reset_handler_data[TEGRA_RESET_DATA_SIZE]; void __tegra_cpu_reset_handler_start(void); void __tegra_cpu_reset_handler(void); +void __tegra20_cpu1_resettable_status_offset(void); void __tegra_cpu_reset_handler_end(void); -void tegra_secondary_startup(void); #ifdef CONFIG_PM_SLEEP #define tegra_cpu_lp1_mask \ @@ -47,6 +47,9 @@ void tegra_secondary_startup(void); (IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \ ((u32)&__tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_LP2] - \ (u32)__tegra_cpu_reset_handler_start))) +#define tegra20_cpu1_resettable_status \ + (IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \ + (u32)__tegra20_cpu1_resettable_status_offset)) #endif #define tegra_cpu_reset_handler_offset \ diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S index be4bc5f853f5c..e6b684e14322c 100644 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ b/arch/arm/mach-tegra/sleep-tegra20.S @@ -97,9 +97,10 @@ ENDPROC(tegra20_hotplug_shutdown) ENTRY(tegra20_cpu_shutdown) cmp r0, #0 reteq lr @ must not be called for CPU 0 - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_RESETTABLE - str r12, [r1] + strb r12, [r1, r2] cpu_to_halt_reg r1, r0 ldr r3, =TEGRA_FLOW_CTRL_VIRT @@ -182,38 +183,41 @@ ENDPROC(tegra_pen_unlock) /* * tegra20_cpu_clear_resettable(void) * - * Called to clear the "resettable soon" flag in PMC_SCRATCH41 when + * Called to clear the "resettable soon" flag in IRAM variable when * it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_clear_resettable) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_NOT_RESETTABLE - str r12, [r1] + strb r12, [r1, r2] ret lr ENDPROC(tegra20_cpu_clear_resettable) /* * tegra20_cpu_set_resettable_soon(void) * - * Called to set the "resettable soon" flag in PMC_SCRATCH41 when + * Called to set the "resettable soon" flag in IRAM variable when * it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_set_resettable_soon) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_RESETTABLE_SOON - str r12, [r1] + strb r12, [r1, r2] ret lr ENDPROC(tegra20_cpu_set_resettable_soon) /* * tegra20_cpu_is_resettable_soon(void) * - * Returns true if the "resettable soon" flag in PMC_SCRATCH41 has been + * Returns true if the "resettable soon" flag in IRAM variable has been * set because it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_is_resettable_soon) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 - ldr r12, [r1] + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset + ldrb r12, [r1, r2] cmp r12, #CPU_RESETTABLE_SOON moveq r0, #1 movne r0, #0 @@ -256,9 +260,10 @@ ENTRY(tegra20_sleep_cpu_secondary_finish) mov r0, #TEGRA_FLUSH_CACHE_LOUIS bl tegra_disable_clean_inv_dcache - mov32 r0, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT + ldr r4, =__tegra20_cpu1_resettable_status_offset mov r3, #CPU_RESETTABLE - str r3, [r0] + strb r3, [r0, r4] bl tegra_cpu_do_idle @@ -274,10 +279,10 @@ ENTRY(tegra20_sleep_cpu_secondary_finish) bl tegra_pen_lock - mov32 r3, TEGRA_PMC_VIRT - add r0, r3, #PMC_SCRATCH41 + mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT + ldr r4, =__tegra20_cpu1_resettable_status_offset mov r3, #CPU_NOT_RESETTABLE - str r3, [r0] + strb r3, [r0, r4] bl tegra_pen_unlock diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h index 92d46ec1361ab..0d59360d891da 100644 --- a/arch/arm/mach-tegra/sleep.h +++ b/arch/arm/mach-tegra/sleep.h @@ -18,6 +18,7 @@ #define __MACH_TEGRA_SLEEP_H #include "iomap.h" +#include "irammap.h" #define TEGRA_ARM_PERIF_VIRT (TEGRA_ARM_PERIF_BASE - IO_CPU_PHYS \ + IO_CPU_VIRT) @@ -29,6 +30,9 @@ + IO_APB_VIRT) #define TEGRA_PMC_VIRT (TEGRA_PMC_BASE - IO_APB_PHYS + IO_APB_VIRT) +#define TEGRA_IRAM_RESET_BASE_VIRT (IO_IRAM_VIRT + \ + TEGRA_IRAM_RESET_HANDLER_OFFSET) + /* PMC_SCRATCH37-39 and 41 are used for tegra_pen_lock and idle */ #define PMC_SCRATCH37 0x130 #define PMC_SCRATCH38 0x134 diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index 382c60e9aa160..7038cae95ddcd 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -17,8 +17,6 @@ #ifndef __MACH_ZYNQ_COMMON_H__ #define __MACH_ZYNQ_COMMON_H__ -void zynq_secondary_startup(void); - extern int zynq_slcr_init(void); extern int zynq_early_slcr_init(void); extern void zynq_slcr_system_reset(void); diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S index dd8c071941e7f..045c72720a4d5 100644 --- a/arch/arm/mach-zynq/headsmp.S +++ b/arch/arm/mach-zynq/headsmp.S @@ -22,8 +22,3 @@ zynq_secondary_trampoline_jump: .globl zynq_secondary_trampoline_end zynq_secondary_trampoline_end: ENDPROC(zynq_secondary_trampoline) - -ENTRY(zynq_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(zynq_secondary_startup) diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c index 52d768ff78571..f66816c491869 100644 --- a/arch/arm/mach-zynq/platsmp.c +++ b/arch/arm/mach-zynq/platsmp.c @@ -87,10 +87,9 @@ int zynq_cpun_start(u32 address, int cpu) } EXPORT_SYMBOL(zynq_cpun_start); -static int zynq_boot_secondary(unsigned int cpu, - struct task_struct *idle) +static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle) { - return zynq_cpun_start(virt_to_phys(zynq_secondary_startup), cpu); + return zynq_cpun_start(virt_to_phys(secondary_startup), cpu); } /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7e7583ddd6076..64d7486262e51 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1395,12 +1395,19 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (!pages) return -ENXIO; + if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) + return -ENXIO; + + pages += off; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { @@ -1953,7 +1960,7 @@ static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) { int next_bitmap; - if (mapping->nr_bitmaps > mapping->extensions) + if (mapping->nr_bitmaps >= mapping->extensions) return -EINVAL; next_bitmap = mapping->nr_bitmaps; diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3d1054f11a8ae..7911f14c2157a 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -268,7 +268,10 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: +1: adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -277,7 +280,7 @@ __v7_ca17mp_setup: orreq r0, r0, r10 @ Enable CPU-specific SMP bits mcreq p15, 0, r0, c1, c0, 1 #endif - b __v7_setup + b __v7_setup_cont __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -335,10 +338,11 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_louis - ldmia r12, {r0-r5, r7, r9, r11, lr} + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} +__v7_setup_cont: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 @@ -460,7 +464,7 @@ ENDPROC(__v7_setup) .align 2 __v7_setup_stack: - .space 4 * 11 @ 11 registers + .space 4 * 7 @ 12 registers __INITDATA diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e0e23582c8b4e..5fe949b084acf 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -162,19 +162,6 @@ static inline int mem_words_used(struct jit_ctx *ctx) return fls(ctx->seen & SEEN_MEM); } -static inline bool is_load_to_a(u16 inst) -{ - switch (inst) { - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - return true; - default: - return false; - } -} - static void jit_fill_hole(void *area, unsigned int size) { u32 *ptr; @@ -186,7 +173,6 @@ static void jit_fill_hole(void *area, unsigned int size) static void build_prologue(struct jit_ctx *ctx) { u16 reg_set = saved_regs(ctx); - u16 first_inst = ctx->skf->insns[0].code; u16 off; #ifdef CONFIG_FRAME_POINTER @@ -216,7 +202,7 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_I(r_X, 0), ctx); /* do not leak kernel data to userspace */ - if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) + if (bpf_needs_clear_a(&ctx->skf->insns[0])) emit(ARM_MOV_I(r_A, 0), ctx); /* stack space for the BPF_MEM words */ diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index f5b00f41c4f6d..b8b6e22f99875 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -499,7 +499,7 @@ void __init orion_ge00_switch_init(struct dsa_platform_data *d, int irq) d->netdev = &orion_ge00.dev; for (i = 0; i < d->nr_chips; i++) - d->chip[i].host_dev = &orion_ge00_shared.dev; + d->chip[i].host_dev = &orion_ge_mvmdio.dev; orion_switch_device.dev.platform_data = d; platform_device_register(&orion_switch_device); diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8aa7910510299..1160434eece05 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -6,9 +6,15 @@ obj-vdso := vgettimeofday.o datapage.o targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING -ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds @@ -40,10 +46,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Actual build commands quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ - $(call cc-ldoption, -Wl$(comma)--build-id) \ - -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 -o $@ + cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 9005b07296c8b..f6455273b2f87 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,13 +45,10 @@ * it does. */ -#define _GNU_SOURCE - -#include #include #include -#include #include +#include #include #include #include @@ -61,6 +58,16 @@ #include #include +#define swab16(x) \ + ((((x) & 0x00ff) << 8) | \ + (((x) & 0xff00) >> 8)) + +#define swab32(x) \ + ((((x) & 0x000000ff) << 24) | \ + (((x) & 0x0000ff00) << 8) | \ + (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0xff000000) >> 24)) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define HOST_ORDER ELFDATA2LSB #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -82,27 +89,41 @@ #define EF_ARM_ABI_FLOAT_HARD 0x400 #endif +static int failed; +static const char *argv0; static const char *outfile; +static void fail(const char *fmt, ...) +{ + va_list ap; + + failed = 1; + fprintf(stderr, "%s: ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + static void cleanup(void) { - if (error_message_count > 0 && outfile != NULL) + if (failed && outfile != NULL) unlink(outfile); } static Elf32_Word read_elf_word(Elf32_Word word, bool swap) { - return swap ? bswap_32(word) : word; + return swap ? swab32(word) : word; } static Elf32_Half read_elf_half(Elf32_Half half, bool swap) { - return swap ? bswap_16(half) : half; + return swap ? swab16(half) : half; } static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap) { - *dst = swap ? bswap_32(val) : val; + *dst = swap ? swab32(val) : val; } int main(int argc, char **argv) @@ -119,68 +140,66 @@ int main(int argc, char **argv) int infd; atexit(cleanup); + argv0 = argv[0]; if (argc != 3) - error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + fail("Usage: %s [infile] [outfile]\n", argv[0]); infile = argv[1]; outfile = argv[2]; infd = open(infile, O_RDONLY); if (infd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", infile); + fail("Cannot open %s: %s\n", infile, strerror(errno)); if (fstat(infd, &stat) != 0) - error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + fail("Failed stat for %s: %s\n", infile, strerror(errno)); inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); if (inbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", infile); + fail("Failed to map %s: %s\n", infile, strerror(errno)); close(infd); inhdr = inbuf; if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) - error(EXIT_FAILURE, 0, "Not an ELF file"); + fail("Not an ELF file\n"); if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) - error(EXIT_FAILURE, 0, "Unsupported ELF class"); + fail("Unsupported ELF class\n"); swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; if (read_elf_half(inhdr->e_type, swap) != ET_DYN) - error(EXIT_FAILURE, 0, "Not a shared object"); + fail("Not a shared object\n"); - if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { - error(EXIT_FAILURE, 0, "Unsupported architecture %#x", - inhdr->e_machine); - } + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) + fail("Unsupported architecture %#x\n", inhdr->e_machine); e_flags = read_elf_word(inhdr->e_flags, swap); if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { - error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", - EF_ARM_EABI_VERSION(e_flags)); + fail("Unsupported EABI version %#x\n", + EF_ARM_EABI_VERSION(e_flags)); } if (e_flags & EF_ARM_ABI_FLOAT_HARD) - error(EXIT_FAILURE, 0, - "Unexpected hard-float flag set in e_flags"); + fail("Unexpected hard-float flag set in e_flags\n"); clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (outfd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + fail("Cannot open %s: %s\n", outfile, strerror(errno)); if (ftruncate(outfd, stat.st_size) != 0) - error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + fail("Cannot truncate %s: %s\n", outfile, strerror(errno)); outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, outfd, 0); if (outbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + fail("Failed to map %s: %s\n", outfile, strerror(errno)); close(outfd); @@ -195,7 +214,7 @@ int main(int argc, char **argv) } if (msync(outbuf, stat.st_size, MS_SYNC) != 0) - error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + fail("Failed to sync %s: %s\n", outfile, strerror(errno)); return EXIT_SUCCESS; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7796af4b1d6f6..6f0a3b41b0090 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,10 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config LOCKDEP_SUPPORT def_bool y @@ -409,6 +413,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4d2a925998f92..3258174e61526 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -30,6 +30,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +KBUILD_CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index 141b2fcabaa67..0f74f05d662a7 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -5,20 +5,16 @@ #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_init(unsigned int cpu); -extern int cpu_suspend(unsigned long arg); +extern int arm_cpuidle_suspend(int index); #else static inline int arm_cpuidle_init(unsigned int cpu) { return -EOPNOTSUPP; } -static inline int cpu_suspend(unsigned long arg) +static inline int arm_cpuidle_suspend(int index) { return -EOPNOTSUPP; } #endif -static inline int arm_cpuidle_suspend(int index) -{ - return cpu_suspend(index); -} #endif diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 17e92f05b1fe5..3ca894ecf699b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -99,11 +99,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } +/* + * vcpu_reg should always be passed a register number coming from a + * read of ESR_EL2. Otherwise, it may give the wrong result on AArch32 + * with banked registers. + */ static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_reg32(vcpu, reg_num); - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f800d45ea2265..44a59c20e7735 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -113,6 +113,14 @@ extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ memstart_addr; }) +/* + * The maximum physical address that the linear direct mapping + * of system RAM can cover. (PAGE_OFFSET can be interpreted as + * a 2's complement signed quantity and negated to derive the + * maximum size of the linear mapping.) + */ +#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) + /* * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 56283f8a675c5..cf73194227681 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -80,7 +80,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) @@ -460,7 +460,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index d6dd9fdbc3bee..d4264bb0a409b 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -83,14 +83,14 @@ #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] -#define compat_sp_irq regs[16] -#define compat_lr_irq regs[17] -#define compat_sp_svc regs[18] -#define compat_lr_svc regs[19] -#define compat_sp_abt regs[20] -#define compat_lr_abt regs[21] -#define compat_sp_und regs[22] -#define compat_lr_und regs[23] +#define compat_lr_irq regs[16] +#define compat_sp_irq regs[17] +#define compat_lr_svc regs[18] +#define compat_sp_svc regs[19] +#define compat_lr_abt regs[20] +#define compat_sp_abt regs[21] +#define compat_lr_und regs[22] +#define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 003802f589633..59a5b0f1e81c3 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -21,6 +21,6 @@ struct sleep_save_sp { phys_addr_t save_ptr_stash_phys; }; -extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); +extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); #endif diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7922c2e710cad..7ac3920b1356a 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -279,22 +279,24 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) */ #define __user_swpX_asm(data, addr, res, temp, B) \ __asm__ __volatile__( \ - " mov %w2, %w1\n" \ - "0: ldxr"B" %w1, [%3]\n" \ - "1: stxr"B" %w0, %w2, [%3]\n" \ + "0: ldxr"B" %w2, [%3]\n" \ + "1: stxr"B" %w0, %w1, [%3]\n" \ " cbz %w0, 2f\n" \ " mov %w0, %w4\n" \ + " b 3f\n" \ "2:\n" \ + " mov %w1, %w2\n" \ + "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ - "3: mov %w0, %w5\n" \ - " b 2b\n" \ + "4: mov %w0, %w5\n" \ + " b 3b\n" \ " .popsection" \ " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ - " .quad 0b, 3b\n" \ - " .quad 1b, 3b\n" \ - " .popsection" \ + " .quad 0b, 4b\n" \ + " .quad 1b, 4b\n" \ + " .popsection\n" \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ : "memory") diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index a78143a5c99ff..2bbd0fee084fe 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -37,7 +37,7 @@ int arm_cpuidle_init(unsigned int cpu) * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU * operations back-end error code otherwise. */ -int cpu_suspend(unsigned long arg) +int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); @@ -47,5 +47,5 @@ int cpu_suspend(unsigned long arg) */ if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) return -EOPNOTSUPP; - return cpu_ops[cpu]->cpu_suspend(arg); + return cpu_ops[cpu]->cpu_suspend(index); } diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ab21e0d582788..5170fd5c8e971 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -122,12 +122,12 @@ static int __init uefi_init(void) /* Show what we know for posterity */ c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor)); + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = c16[i]; vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor)); + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); } pr_info("EFI v%u.%.02u by %s\n", @@ -257,7 +257,8 @@ static bool __init efi_virtmap_init(void) */ if (!is_normal_ram(md)) prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) prot = PAGE_KERNEL_EXEC; else prot = PAGE_KERNEL; diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 08cafc518b9a5..0f03a8fe23144 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -178,6 +178,24 @@ ENTRY(ftrace_stub) ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* save return value regs*/ + .macro save_return_regs + sub sp, sp, #64 + stp x0, x1, [sp] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + add sp, sp, #64 + .endm + /* * void ftrace_graph_caller(void) * @@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller) * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) - str x0, [sp, #-16]! + save_return_regs mov x0, x29 // parent's fp bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); mov x30, x0 // restore the original return address - ldr x0, [sp], #16 + restore_return_regs ret END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 959fe87335609..bddd04d031db7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -517,6 +517,7 @@ el0_sp_pc: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3dca15634e69c..c31e59fe2cb88 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -157,6 +157,7 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 19f915e8f6e0f..cc7435c9676ec 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -565,6 +565,16 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx x0, x0, #8, #4 + cmp x0, #1 + b.lt 4f // Skip if no PMU present + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 +4: + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf4107f6efe..876eb8df50bf3 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cce18c85d2e8e..b67b01cb51096 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1242,9 +1242,6 @@ static void armv8pmu_reset(void *info) /* Initialize & Reset PMNC: C and P bits. */ armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); - - /* Disable access from userspace. */ - asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); } static int armv8_pmuv3_map_event(struct perf_event *event) @@ -1318,7 +1315,7 @@ static int armpmu_device_probe(struct platform_device *pdev) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) - return 0; + goto out; irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -1355,6 +1352,7 @@ static int armpmu_device_probe(struct platform_device *pdev) else kfree(irqs); +out: cpu_pmu->plat_device = pdev; return 0; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ea18cb53921e8..24d4733b7e3c6 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -546,7 +546,7 @@ static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index) if (state[index - 1].type == PSCI_POWER_STATE_TYPE_STANDBY) ret = psci_ops.cpu_suspend(state[index - 1], 0); else - ret = __cpu_suspend(index, psci_suspend_finisher); + ret = cpu_suspend(index, psci_suspend_finisher); return ret; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d882b833dbdb5..608ac6aa497b6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -58,6 +58,12 @@ */ void ptrace_disable(struct task_struct *child) { + /* + * This would be better off in core code, but PTRACE_DETACH has + * grown its fair share of arch-specific worts and changing it + * is likely to cause regressions on obscure architectures. + */ + user_disable_single_step(child); } #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 74753132c3ac8..bbdb53b87e136 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -523,6 +523,10 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "processor\t: %d\n", i); #endif + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + /* * Dump out the common processor features in a single line. * Userspace should read the hwcaps with getauxval(AT_HWCAP) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d26fcd4cd6e62..c58aee062590c 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitely for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif break; @@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE)) @@ -213,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -236,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -264,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -274,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2cb008177252f..d3a202b85ba64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -569,7 +569,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -612,7 +612,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 407991bf79f51..ccb6078ed9f20 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -48,11 +48,7 @@ int notrace unwind_frame(struct stackframe *frame) frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - /* - * -4 here because we care about the PC at time of bl, - * not where the return will go. - */ - frame->pc = *(unsigned long *)(fp + 8) - 4; + frame->pc = *(unsigned long *)(fp + 8); return 0; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index d7daf45ae7a25..357418137db7c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -51,13 +52,13 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) } /* - * __cpu_suspend + * cpu_suspend * * arg: argument to pass to the finisher function * fn: finisher function pointer * */ -int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { struct mm_struct *mm = current->active_mm; int ret; @@ -70,6 +71,13 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ local_dbg_save(flags); + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka suspend finishers) hence + * disable graph tracing during their execution. + */ + pause_graph_tracing(); + /* * mm context saved on the stack, it will be restored when * the cpu comes out of reset through the identity mapped @@ -80,17 +88,21 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) if (ret == 0) { /* * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; restore the active_mm mappings in - * TTBR0_EL1 unless the active_mm == &init_mm, in which case - * the thread entered __cpu_suspend with TTBR0_EL1 set to - * reserved TTBR0 page tables and should be restored as such. + * idmap to enable the MMU; set the TTBR0 to the reserved + * page tables to prevent speculative TLB allocations, flush + * the local tlb and set the default tcr_el1.t0sz so that + * the TTBR0 address space set-up is properly restored. + * If the current active_mm != &init_mm we entered cpu_suspend + * with mappings in TTBR0 that must be restored, so we switch + * them back to complete the address space configuration + * restoration before returning. */ - if (mm == &init_mm) - cpu_set_reserved_ttbr0(); - else - cpu_switch_mm(mm->pgd, mm); - + cpu_set_reserved_ttbr0(); flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); /* * Restore per-cpu offset before any kernel @@ -107,6 +119,8 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) hw_breakpoint_restore(NULL); } + unpause_graph_tracing(); + /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index ff3bddea482dd..f6fe17d88da55 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -15,6 +15,10 @@ ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared +# down to collect2, resulting in silent corruption of the vDSO image. +ccflags-y += -Wl,-shared + obj-y += vdso.o extra-y += vdso.lds vdso-offsets.h CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index a2c29865c3fe5..aff07bcad8827 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -54,9 +54,12 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif -#ifdef CONFIG_DEBUG_ALIGN_RODATA +#if defined(CONFIG_DEBUG_ALIGN_RODATA) #define ALIGN_DEBUG_RO . = ALIGN(1<id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } /** diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5befd010e2325..64f9e60b31da1 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -844,8 +844,6 @@ mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -853,6 +851,9 @@ str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 @@ -947,13 +948,15 @@ ENTRY(__kvm_vcpu_run) // Guest context add x2, x0, #VCPU_CONTEXT + // We must restore the 32-bit state before the sysregs, thanks + // to Cortex-A57 erratum #852523. + restore_guest_32bit_state bl __restore_sysregs bl __restore_fpsimd skip_debug_state x3, 1f bl __restore_debug 1: - restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index f02530e726f69..648112e90ed54 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -48,7 +48,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) /* Note: These now point to the banked copies */ *vcpu_spsr(vcpu) = new_spsr_value; - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ if (sctlr & (1 << 13)) @@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, false, addr); - - inject_abt64(vcpu, false, addr); + else + inject_abt64(vcpu, false, addr); } /** @@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, true, addr); - - inject_abt64(vcpu, true, addr); + else + inject_abt64(vcpu, true, addr); } /** @@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_undef32(vcpu); - - inject_undef64(vcpu); + else + inject_undef64(vcpu); } diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index baa758d370210..76c1e6cd36fc4 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -92,6 +92,14 @@ static void reset_context(void *info) unsigned int cpu = smp_processor_id(); struct mm_struct *mm = current->active_mm; + /* + * current->active_mm could be init_mm for the idle thread immediately + * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to + * the reserved value, so no need to reset any context. + */ + if (mm == &init_mm) + return; + smp_rmb(); asid = cpu_last_asid + cpu; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 96da13167d4a5..fa5efaa5c3ac5 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -279,6 +279,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2de9d2e59d968..0eeb4f0930a08 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,13 +40,13 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) int pmd_huge(pmd_t pmd) { - return !(pmd_val(pmd) & PMD_TABLE_BIT); + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } int pud_huge(pud_t pud) { #ifndef __PAGETABLE_PMD_FOLDED - return !(pud_val(pud) & PUD_TABLE_BIT); + return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); #else return 0; #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 597831bdddf3d..ad87ce826cce4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -262,7 +262,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(start + __phys_to_pfn(reg->size), + prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), MAX_ORDER_NR_PAGES); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 5b8b664422d36..cb34eb8bbb9db 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -450,6 +450,9 @@ void __init paging_init(void) empty_zero_page = virt_to_page(zero_page); + /* Ensure the zero page is visible to the page table walker */ + dsb(ishst); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index e47ed1c5dce1b..545710f854f81 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages, if (end < MODULES_VADDR || end >= MODULES_END) return -EINVAL; + if (!numpages) + return 0; + data.set_mask = set_mask; data.clear_mask = clear_mask; diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 4c4d93c4bf65b..d69dffffaa899 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -62,3 +62,15 @@ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH #endif .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index cdd754e19b9b2..d253908a988d5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -165,6 +165,7 @@ ENTRY(cpu_do_resume) */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 mov x0, x12 dsb nsh // Make sure local tlb invalidation completed isb @@ -202,7 +203,9 @@ ENTRY(__cpu_setup) mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD - msr mdscr_el1, xzr // Reset mdscr_el1 + mov x0, #1 << 12 // Reset mdscr_el1 and disable + msr mdscr_el1, x0 // access to the DCC from EL0 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: * diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index de0a81a539a01..aee5637ea436f 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim + * Copyright (C) 2014-2015 Zi Shen Lim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -35,6 +35,7 @@ aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \ AARCH64_INSN_BRANCH_COMP_##type) #define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO) +#define A64_CBNZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, NONZERO) /* Conditional branch (immediate) */ #define A64_COND_BRANCH(cond, offset) \ @@ -110,6 +111,10 @@ /* Rd = Rn >> shift; signed */ #define A64_ASR(sf, Rd, Rn, shift) A64_SBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31) +/* Zero extend */ +#define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) +#define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) + /* Move wide (immediate) */ #define A64_MOVEW(sf, Rd, imm16, shift, type) \ aarch64_insn_gen_movewide(Rd, imm16, shift, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dc6a4842683aa..6217f80702d2a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim + * Copyright (C) 2014-2015 Zi Shen Lim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -113,9 +113,9 @@ static inline void emit_a64_mov_i(const int is64, const int reg, static inline int bpf2a64_offset(int bpf_to, int bpf_from, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to + 1]; + int to = ctx->offset[bpf_to]; /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from + 1] - 1; + int from = ctx->offset[bpf_from] - 1; return to - from; } @@ -225,6 +225,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 jmp_cond; s32 jmp_offset; +#define check_imm(bits, imm) do { \ + if ((((imm) > 0) && ((imm) >> (bits))) || \ + (((imm) < 0) && (~(imm) >> (bits)))) { \ + pr_info("[%2d] imm=%d(0x%x) out of range\n", \ + i, imm, imm); \ + return -EINVAL; \ + } \ +} while (0) +#define check_imm19(imm) check_imm(19, imm) +#define check_imm26(imm) check_imm(26, imm) + switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: @@ -258,15 +269,33 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(A64_UDIV(is64, dst, dst, src), ctx); - break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - ctx->tmp_used = 1; - emit(A64_UDIV(is64, tmp, dst, src), ctx); - emit(A64_MUL(is64, tmp, tmp, src), ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + { + const u8 r0 = bpf2a64[BPF_REG_0]; + + /* if (src == 0) return 0 */ + jmp_offset = 3; /* skip ahead to else path */ + check_imm19(jmp_offset); + emit(A64_CBNZ(is64, src, jmp_offset), ctx); + emit(A64_MOVZ(1, r0, 0, 0), ctx); + jmp_offset = epilogue_offset(ctx); + check_imm26(jmp_offset); + emit(A64_B(jmp_offset), ctx); + /* else */ + switch (BPF_OP(code)) { + case BPF_DIV: + emit(A64_UDIV(is64, dst, dst, src), ctx); + break; + case BPF_MOD: + ctx->tmp_used = 1; + emit(A64_UDIV(is64, tmp, dst, src), ctx); + emit(A64_MUL(is64, tmp, tmp, src), ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + break; + } break; + } case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(A64_LSLV(is64, dst, dst, src), ctx); @@ -289,23 +318,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_END | BPF_FROM_BE: #ifdef CONFIG_CPU_BIG_ENDIAN if (BPF_SRC(code) == BPF_FROM_BE) - break; + goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ if (BPF_SRC(code) == BPF_FROM_LE) - break; + goto emit_bswap_uxt; #endif switch (imm) { case 16: emit(A64_REV16(is64, dst, dst), ctx); + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: emit(A64_REV32(is64, dst, dst), ctx); + /* upper 32 bits already cleared */ break; case 64: emit(A64_REV64(dst, dst), ctx); break; } break; +emit_bswap_uxt: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit(A64_UXTW(is64, dst, dst), ctx); + break; + case 64: + /* nop */ + break; + } + break; /* dst = imm */ case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: @@ -375,17 +422,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(A64_ASR(is64, dst, dst, imm), ctx); break; -#define check_imm(bits, imm) do { \ - if ((((imm) > 0) && ((imm) >> (bits))) || \ - (((imm) < 0) && (~(imm) >> (bits)))) { \ - pr_info("[%2d] imm=%d(0x%x) out of range\n", \ - i, imm, imm); \ - return -EINVAL; \ - } \ -} while (0) -#define check_imm19(imm) check_imm(19, imm) -#define check_imm26(imm) check_imm(26, imm) - /* JUMP off */ case BPF_JMP | BPF_JA: jmp_offset = bpf2a64_offset(i + off, i, ctx); @@ -640,10 +676,11 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + ret = build_insn(insn, ctx); + if (ctx->image == NULL) ctx->offset[i] = ctx->idx; - ret = build_insn(insn, ctx); if (ret > 0) { i++; continue; diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c index 23b1a97fae7ad..52c179bec0cc6 100644 --- a/arch/avr32/mach-at32ap/clock.c +++ b/arch/avr32/mach-at32ap/clock.c @@ -80,6 +80,9 @@ int clk_enable(struct clk *clk) { unsigned long flags; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); __clk_enable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -106,6 +109,9 @@ void clk_disable(struct clk *clk) { unsigned long flags; + if (IS_ERR_OR_NULL(clk)) + return; + spin_lock_irqsave(&clk_lock, flags); __clk_disable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -117,6 +123,9 @@ unsigned long clk_get_rate(struct clk *clk) unsigned long flags; unsigned long rate; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); rate = clk->get_rate(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -129,6 +138,9 @@ long clk_round_rate(struct clk *clk, unsigned long rate) { unsigned long flags, actual_rate; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -145,6 +157,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) unsigned long flags; long ret; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -161,6 +176,9 @@ int clk_set_parent(struct clk *clk, struct clk *parent) unsigned long flags; int ret; + if (!clk) + return 0; + if (!clk->set_parent) return -ENOSYS; @@ -174,7 +192,7 @@ EXPORT_SYMBOL(clk_set_parent); struct clk *clk_get_parent(struct clk *clk) { - return clk->parent; + return !clk ? NULL : clk->parent; } EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 28a09529f2069..3a76927458681 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -86,6 +86,7 @@ decompress_kernel(int mmu_on, unsigned char *zimage_data, free_mem_end_ptr = free_mem_ptr + BOOT_HEAP_SIZE; puts("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output_data, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output_data, 0, + NULL, error); puts("done.\nBooting the kernel.\n"); } diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 0392112a5d702..a5ecef7188baa 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -81,7 +81,10 @@ static struct resource code_resource = { }; unsigned long memory_start; +EXPORT_SYMBOL(memory_start); + unsigned long memory_end; +EXPORT_SYMBOL(memory_end); void __init setup_arch(char **); int get_cpuinfo(char *); diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 33013dfcd3e1d..5c68c85d5dbe5 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -125,6 +125,13 @@ endif # M68KCLASSIC if COLDFIRE +choice + prompt "ColdFire SoC type" + default M520x + help + Select the type of ColdFire System-on-Chip (SoC) that you want + to build for. + config M5206 bool "MCF5206" depends on !MMU @@ -174,9 +181,6 @@ config M525x help Freescale (Motorola) Coldfire 5251/5253 processor support. -config M527x - bool - config M5271 bool "MCF5271" depends on !MMU @@ -223,9 +227,6 @@ config M5307 help Motorola ColdFire 5307 processor support. -config M53xx - bool - config M532x bool "MCF532x" depends on !MMU @@ -251,9 +252,6 @@ config M5407 help Motorola ColdFire 5407 processor support. -config M54xx - bool - config M547x bool "MCF547x" select M54xx @@ -280,6 +278,17 @@ config M5441x help Freescale Coldfire 54410/54415/54416/54417/54418 processor support. +endchoice + +config M527x + bool + +config M53xx + bool + +config M54xx + bool + endif # COLDFIRE @@ -416,22 +425,10 @@ config HAVE_MBAR config HAVE_IPSBAR bool -config CLOCK_SET - bool "Enable setting the CPU clock frequency" - depends on COLDFIRE - default n - help - On some CPU's you do not need to know what the core CPU clock - frequency is. On these you can disable clock setting. On some - traditional 68K parts, and on all ColdFire parts you need to set - the appropriate CPU clock frequency. On these devices many of the - onboard peripherals derive their timing from the master CPU clock - frequency. - config CLOCK_FREQ int "Set the core clock frequency" default "66666666" - depends on CLOCK_SET + depends on COLDFIRE help Define the CPU clock frequency in use. This is the core clock frequency, it may or may not be the same as the external clock diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h index c94557b914482..50aa4dac9ca28 100644 --- a/arch/m68k/include/asm/coldfire.h +++ b/arch/m68k/include/asm/coldfire.h @@ -19,7 +19,7 @@ * in any case new boards come along from time to time that have yet * another different clocking frequency. */ -#ifdef CONFIG_CLOCK_SET +#ifdef CONFIG_CLOCK_FREQ #define MCF_CLK CONFIG_CLOCK_FREQ #else #error "Don't know what your ColdFire CPU clock frequency is??" diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f72..066e74f666ae9 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f5016656494f6..a3b1ffe50aa07 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1417,6 +1417,7 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA select GENERIC_CSUM + select MIPS_O32_FP64_SUPPORT if MIPS32_O32 help Choose this option to build a kernel for release 6 or later of the MIPS64 architecture. New MIPS processors, starting with the Warrior diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 7fc8397d16f21..fd2a36a79f97c 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -186,6 +186,7 @@ int get_c0_perfcount_int(void) { return ATH79_MISC_IRQ(5); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 54831069a2062..080cd53bac369 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -111,8 +111,8 @@ void decompress_kernel(unsigned long boot_heap_start) puts("\n"); /* Decompress the kernel with according algorithm */ - decompress((char *)zimage_start, zimage_size, 0, 0, - (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, error); + __decompress((char *)zimage_start, zimage_size, 0, 0, + (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); /* FIXME: should we flush cache here? */ puts("Now, booting the kernel...\n"); diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 084780b355aa5..1b06251898352 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -74,7 +74,7 @@ static inline int __enable_fpu(enum fpu_mode mode) goto fr_common; case FPU_64BIT: -#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6) \ +#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \ || defined(CONFIG_64BIT)) /* we only have a 32-bit FPU */ return SIGFPE; diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h deleted file mode 100644 index 11d3b572b1b3d..0000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H -#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H - -#include - -#define plat_post_dma_flush bmips_post_dma_flush - -#include - -#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h index 9488fa5f88660..afc96ecb90042 100644 --- a/arch/mips/include/asm/mach-generic/spaces.h +++ b/arch/mips/include/asm/mach-generic/spaces.h @@ -94,7 +94,11 @@ #endif #ifndef FIXADDR_TOP +#ifdef CONFIG_KVM_GUEST +#define FIXADDR_TOP ((unsigned long)(long)(int)0x7ffe0000) +#else #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif +#endif #endif /* __ASM_MACH_GENERIC_SPACES_H */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 819af9d057a8b..7fe24aef7fdc6 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -182,8 +182,39 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ +#ifdef CONFIG_64BIT +#define LL_INSN "lld" +#define SC_INSN "scd" +#else /* CONFIG_32BIT */ +#define LL_INSN "ll" +#define SC_INSN "sc" +#endif + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + " .set push\n" + " .set noreorder\n" + "1: " LL_INSN " %[tmp], %[buddy]\n" + " bnez %[tmp], 2f\n" + " or %[tmp], %[tmp], %[global]\n" + " " SC_INSN " %[tmp], %[buddy]\n" + " beqz %[tmp], 1b\n" + " nop\n" + "2:\n" + " .set pop" + : [buddy] "+m" (buddy->pte), + [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } #endif } @@ -322,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (!(pte_val(pte) & _PAGE_NO_READ)) pte_val(pte) |= _PAGE_SILENT_READ; else @@ -527,7 +558,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) { pmd_val(pmd) |= _PAGE_ACCESSED; -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (!(pmd_val(pmd) & _PAGE_NO_READ)) pmd_val(pmd) |= _PAGE_SILENT_READ; else diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 2b25d1ba1ea03..16f1ea9ab1912 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 28d6d9364bd1f..a71da576883c8 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -152,6 +152,31 @@ .set noreorder bltz k0, 8f move k1, sp +#ifdef CONFIG_EVA + /* + * Flush interAptiv's Return Prediction Stack (RPS) by writing + * EntryHi. Toggling Config7.RPS is slower and less portable. + * + * The RPS isn't automatically flushed when exceptions are + * taken, which can result in kernel mode speculative accesses + * to user addresses if the RPS mispredicts. That's harmless + * when user and kernel share the same address space, but with + * EVA the same user segments may be unmapped to kernel mode, + * even containing sensitive MMIO regions or invalid memory. + * + * This can happen when the kernel sets the return address to + * ret_from_* and jr's to the exception handler, which looks + * more like a tail call than a function call. If nested calls + * don't evict the last user address in the RPS, it will + * mispredict the return and fetch from a user controlled + * address into the icache. + * + * More recent EVA-capable cores with MAAR to restrict + * speculative accesses aren't affected. + */ + MFC0 k0, CP0_ENTRYHI + MTC0 k0, CP0_ENTRYHI +#endif .set reorder /* Called from user mode, new stack. */ get_saved_sp diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6499d93ae68d7..47bc45a67e9ba 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -101,10 +101,8 @@ static inline void syscall_get_arguments(struct task_struct *task, /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */ if ((config_enabled(CONFIG_32BIT) || test_tsk_thread_flag(task, TIF_32BIT_REGS)) && - (regs->regs[2] == __NR_syscall)) { + (regs->regs[2] == __NR_syscall)) i++; - n++; - } while (n--) ret |= mips_get_syscall_arg(args++, task, regs, i++); diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 3e4491aa6d6b2..789d7bf4fef32 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -154,7 +154,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { unsigned int real_len; - cpumask_t mask; + cpumask_t allowed, mask; int retval; struct task_struct *p; @@ -173,7 +173,8 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_and(&mask, &p->thread.user_cpus_allowed, cpu_possible_mask); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: read_unlock(&tasklist_lock); diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index 74bab9ddd0e19..c6bbf21650515 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -24,7 +24,7 @@ LEAF(relocate_new_kernel) process_entry: PTR_L s2, (s0) - PTR_ADD s0, s0, SZREG + PTR_ADDIU s0, s0, SZREG /* * In case of a kdump/crash kernel, the indirection page is not @@ -61,9 +61,9 @@ copy_word: /* copy page word by word */ REG_L s5, (s2) REG_S s5, (s4) - PTR_ADD s4, s4, SZREG - PTR_ADD s2, s2, SZREG - LONG_SUB s6, s6, 1 + PTR_ADDIU s4, s4, SZREG + PTR_ADDIU s2, s2, SZREG + LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word b process_entry diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index ad4d44635c760..a6f6b762c47a4 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -80,7 +80,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_64_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 446cc654da56c..4b2010654c463 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -72,7 +72,7 @@ n32_syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_N32_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 19a7705f2a015..5d7f2634996fd 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index faa46ebd9ddae..d0744cc77ea7f 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu) } } +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +static inline void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpu_data[i].package == cpu_data[k].package && + cpu_data[i].core == cpu_data[k].core) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + cpumask_copy(&cpu_foreign_map, &temp_foreign_map); +} + struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); @@ -146,6 +176,8 @@ asmlinkage void start_secondary(void) set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); + calculate_cpu_foreign_map(); + cpumask_set_cpu(cpu, &cpu_callin_map); synchronise_count_slave(cpu); @@ -173,9 +205,18 @@ void __irq_entry smp_call_function_interrupt(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU: + * Remove this CPU. Be a bit slow here and + * set the bits for every online CPU so we don't miss + * any IPI whilst taking this VPE down. */ + + cpumask_copy(&cpu_foreign_map, cpu_online_mask); + + /* Make it visible to every other CPU */ + smp_mb(); + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); local_irq_disable(); while (1); } @@ -197,6 +238,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); + calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(cpu_possible_mask); #endif diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d2d1c1933bc9f..54923d6b7e165 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -192,6 +192,7 @@ static void show_stacktrace(struct task_struct *task, void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; + mm_segment_t old_fs = get_fs(); if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; @@ -210,7 +211,13 @@ void show_stack(struct task_struct *task, unsigned long *sp) prepare_frametrace(®s); } } + /* + * show_stack() deals exclusively with kernel mode, so be sure to access + * the stack in the kernel (not user) address space. + */ + set_fs(KERNEL_DS); show_stacktrace(task, ®s); + set_fs(old_fs); } static void show_code(unsigned int __user *pc) @@ -686,15 +693,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; - siginfo_t info; + siginfo_t info = { + .si_signo = SIGFPE, + .si_code = FPE_INTOVF, + .si_addr = (void __user *)regs->cp0_epc, + }; prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); exception_exit(prev_state); } @@ -870,7 +877,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) void do_trap_or_bp(struct pt_regs *regs, unsigned int code, const char *str) { - siginfo_t info; + siginfo_t info = { 0 }; char b[40]; #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -898,7 +905,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, else info.si_code = FPE_INTOVF; info.si_signo = SIGFPE; - info.si_errno = 0; info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); break; @@ -1518,6 +1524,7 @@ asmlinkage void do_mcheck(struct pt_regs *regs) const int field = 2 * sizeof(unsigned long); int multi_match = regs->cp0_status & ST0_TS; enum ctx_state prev_state; + mm_segment_t old_fs = get_fs(); prev_state = exception_enter(); show_regs(regs); @@ -1539,8 +1546,13 @@ asmlinkage void do_mcheck(struct pt_regs *regs) dump_tlb_all(); } + if (!user_mode(regs)) + set_fs(KERNEL_DS); + show_code((unsigned int __user *) regs->cp0_epc); + set_fs(old_fs); + /* * Some chips may have other causes of machine check (e.g. SB1 * graduation timer) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index af84bef0c90de..eb3efd137fd17 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -438,7 +438,7 @@ do { \ : "memory"); \ } while(0) -#define StoreDW(addr, value, res) \ +#define _StoreDW(addr, value, res) \ do { \ __asm__ __volatile__ ( \ ".set\tpush\n\t" \ diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index d5fa3eaf39a10..41b1b090f56f6 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1581,7 +1581,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, base = (inst >> 21) & 0x1f; op_inst = (inst >> 16) & 0x1f; - offset = inst & 0xffff; + offset = (int16_t)inst; cache = (inst >> 16) & 0x3; op = (inst >> 18) & 0x7; diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index c567240386a0f..d1ee95a7f7dd8 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -165,9 +165,11 @@ FEXPORT(__kvm_mips_vcpu_run) FEXPORT(__kvm_mips_load_asid) /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: @@ -482,9 +484,11 @@ __kvm_mips_return_to_guest: mtc0 t0, CP0_EPC /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bb68e8d520e83..ace4ed7d41c6d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -277,7 +277,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (!gebase) { err = -ENOMEM; - goto out_free_cpu; + goto out_uninit_cpu; } kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); @@ -341,6 +341,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) out_free_gebase: kfree(gebase); +out_uninit_cpu: + kvm_vcpu_uninit(vcpu); + out_free_cpu: kfree(vcpu); @@ -697,7 +700,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_to_user(uaddr, vs, 16); + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -727,7 +730,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_from_user(vs, uaddr, 16); + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -982,7 +985,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = &kvm->memslots->memslots[log->slot]; + memslot = id_to_memslot(kvm->memslots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index 3fc2e6d70c779..a0706fd4ce0a0 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -99,6 +99,23 @@ int clk_set_rate(struct clk *clk, unsigned long rate) } EXPORT_SYMBOL(clk_set_rate); +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + if (unlikely(!clk_good(clk))) + return 0; + if (clk->rates && *clk->rates) { + unsigned long *r = clk->rates; + + while (*r && (*r != rate)) + r++; + if (!*r) { + return clk->rate; + } + } + return rate; +} +EXPORT_SYMBOL(clk_round_rate); + int clk_enable(struct clk *clk) { if (unlikely(!clk_good(clk))) diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 6ab10573490de..d01ade63492fd 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -466,6 +466,7 @@ int get_c0_perfcount_int(void) { return ltq_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c index 22f04ca2ff3e5..2efb18aafa4f1 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson/common/env.c @@ -64,6 +64,9 @@ void __init prom_init_env(void) } if (memsize == 0) memsize = 256; + + loongson_sysconf.nr_uarts = 1; + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); #else struct boot_params *boot_p; diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 22b9b2cb9219f..2b95e34fa9e89 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ - if (NO_R6EMU && insn.r_format.opcode == jr_op) + if (NO_R6EMU && insn.r_format.func == jr_op) break; *contpc = regs->regs[insn.r_format.rs]; return 1; @@ -1137,7 +1137,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case mfhc_op: - if (!cpu_has_mips_r2) + if (!cpu_has_mips_r2_r6) goto sigill; /* copregister rd -> gpr[rt] */ @@ -1148,7 +1148,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case mthc_op: - if (!cpu_has_mips_r2) + if (!cpu_has_mips_r2_r6) goto sigill; /* copregister rd <- gpr[rt] */ @@ -1181,6 +1181,24 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, } break; + case bc1eqz_op: + case bc1nez_op: + if (!cpu_has_mips_r6 || delay_slot(xcp)) + return SIGILL; + + cond = likely = 0; + switch (MIPSInst_RS(ir)) { + case bc1eqz_op: + if (get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1) + cond = 1; + break; + case bc1nez_op: + if (!(get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)) + cond = 1; + break; + } + goto branch_common; + case bc_op: if (delay_slot(xcp)) return SIGILL; @@ -1207,7 +1225,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, case bct_op: break; } - +branch_common: set_delay_slot(xcp); if (cond) { /* diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 2e03ab1735911..dca0efc078c15 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -37,6 +37,7 @@ #include /* for run_uncached() */ #include #include +#include /* * Special Variant of smp_call_function for use by cache functions: @@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) { preempt_disable(); -#ifndef CONFIG_MIPS_MT_SMP - smp_call_function(func, info, 1); -#endif + /* + * The Coherent Manager propagates address-based cache ops to other + * cores but not index-based ops. However, r4k_on_each_cpu is used + * in both cases so there is no easy way to tell what kind of op is + * executed to the other cores. The best we can probably do is + * to restrict that call when a CM is not present because both + * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. + */ + if (!mips_cm_present()) + smp_call_function_many(&cpu_foreign_map, func, info, 1); func(info); preempt_enable(); } diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 609d1241b0c47..371eec1136591 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -100,7 +100,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 97c87027c17f8..90b0e83167903 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -242,7 +242,7 @@ static void output_pgtable_bits_defines(void) pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (cpu_has_rixi) { #ifdef _PAGE_NO_EXEC_SHIFT pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f459..a7f7d9ffb4025 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -148,6 +148,7 @@ int get_c0_perfcount_int(void) return mips_cpu_perf_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { @@ -165,14 +166,17 @@ unsigned int get_c0_compare_int(void) static void __init init_rtc(void) { - /* stop the clock whilst setting it up */ - CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL); + unsigned char freq, ctrl; - /* 32KHz time base */ - CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); + /* Set 32KHz time base if not already set */ + freq = CMOS_READ(RTC_FREQ_SELECT); + if ((freq & RTC_DIV_CTL) != RTC_REF_CLCK_32KHZ) + CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); - /* start the clock */ - CMOS_WRITE(RTC_24H, RTC_CONTROL); + /* Ensure SET bit is clear so RTC can run */ + ctrl = CMOS_READ(RTC_CONTROL); + if (ctrl & RTC_SET) + CMOS_WRITE(ctrl & ~RTC_SET, RTC_CONTROL); } void __init plat_time_init(void) diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index e1d69895fb1de..a120b7a5a8fe4 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -77,6 +77,7 @@ int get_c0_perfcount_int(void) return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; return -1; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index e23fdf2a9c80d..d6d27d51d1310 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -556,19 +556,6 @@ static inline u16 align_sp(unsigned int num) return num; } -static bool is_load_to_a(u16 inst) -{ - switch (inst) { - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - return true; - default: - return false; - } -} - static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) { int i = 0, real_off = 0; @@ -686,7 +673,6 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx) static void build_prologue(struct jit_ctx *ctx) { - u16 first_inst = ctx->skf->insns[0].code; int sp_off; /* Calculate the total offset for the stack pointer */ @@ -700,7 +686,7 @@ static void build_prologue(struct jit_ctx *ctx) emit_jit_reg_move(r_X, r_zero, ctx); /* Do not leak kernel data to userspace */ - if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) + if (bpf_needs_clear_a(&ctx->skf->insns[0])) emit_jit_reg_move(r_A, r_zero, ctx); } diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa8..ab73f6f405bb0 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -26,6 +26,7 @@ int get_c0_perfcount_int(void) { return gic_get_c0_perfcount_int(); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); void __init plat_time_init(void) { diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 7cf91b92e9d10..199ace4ca1ad6 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -89,6 +89,7 @@ int get_c0_perfcount_int(void) { return rt_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 4434b54e1d87c..78ae5552fdb89 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -1,6 +1,7 @@ config MN10300 def_bool y select HAVE_OPROFILE + select HAVE_UID16 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select HAVE_ARCH_TRACEHOOK @@ -37,9 +38,6 @@ config HIGHMEM config NUMA def_bool n -config UID16 - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e5a693b16da29..443f44de10209 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -17,6 +17,7 @@ config OPENRISC select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_CPU_DEVICES + select HAVE_UID16 select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_STRNCPY_FROM_USER @@ -31,9 +32,6 @@ config MMU config HAVE_DMA_ATTRS def_bool y -config UID16 - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 3a08eae3318fe..3edbb9fc91b4e 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -72,7 +72,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) + if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) { /* * This is the permanent pmd attached to the pgd; * cannot free it. @@ -81,6 +81,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) */ mm_inc_nr_pmds(mm); return; + } free_pages((unsigned long)pmd, PMD_ORDER); } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 0a183756d6ec2..f93c4a4e65803 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,7 +16,7 @@ #include #include -extern spinlock_t pa_dbit_lock; +extern spinlock_t pa_tlb_lock; /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock; */ #define kern_addr_valid(addr) (1) +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ + +static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); +} + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock; *(pteptr) = (pteval); \ } while(0) -extern void purge_tlb_entries(struct mm_struct *, unsigned long); +#define pte_inserted(x) \ + ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ + == (_PAGE_PRESENT|_PAGE_ACCESSED)) -#define set_pte_at(mm, addr, ptep, pteval) \ - do { \ +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_dbit_lock, flags); \ - set_pte(ptep, pteval); \ - purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_dbit_lock, flags); \ + spin_lock_irqsave(&pa_tlb_lock, flags); \ + old_pte = *ptep; \ + set_pte(ptep, pteval); \ + if (pte_inserted(old_pte)) \ + purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - pte_clear(mm,addr,ptep); - purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + set_pte(ptep, __pte(0)); + if (pte_inserted(old_pte)) + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; } @@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 9d086a599fa05..e84b96478193c 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -13,6 +13,9 @@ * active at any one time on the Merced bus. This tlb purge * synchronisation is fairly lightweight and harmless so we activate * it on all systems not just the N class. + + * It is also used to ensure PTE updates are atomic and consistent + * with the TLB. */ extern spinlock_t pa_tlb_lock; @@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *); #define smp_flush_tlb_all() flush_tlb_all() +int __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma, start, end) \ + __flush_tlb_range((vma)->vm_mm->context, start, end) + +#define flush_tlb_kernel_range(start, end) \ + __flush_tlb_range(0, start, end) + /* * flush_tlb_mm() * - * XXX This code is NOT valid for HP-UX compatibility processes, - * (although it will probably work 99% of the time). HP-UX - * processes are free to play with the space id's and save them - * over long periods of time, etc. so we have to preserve the - * space and just flush the entire tlb. We need to check the - * personality in order to do that, but the personality is not - * currently being set correctly. - * - * Of course, Linux processes could do the same thing, but - * we don't support that (and the compilers, dynamic linker, - * etc. do not do that). + * The code to switch to a new context is NOT valid for processes + * which play with the space id's. Thus, we have to preserve the + * space and just flush the entire tlb. However, the compilers, + * dynamic linker, etc, do not manipulate space id's, so there + * could be a significant performance benefit in switching contexts + * and not flushing the whole tlb. */ static inline void flush_tlb_mm(struct mm_struct *mm) @@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm) BUG_ON(mm == &init_mm); /* Should never happen */ #if 1 || defined(CONFIG_SMP) + /* Except for very small threads, flushing the whole TLB is + * faster than using __flush_tlb_range. The pdtlb and pitlb + * instructions are very slow because of the TLB broadcast. + * It might be faster to do local range flushes on all CPUs + * on PA 2.0 systems. + */ flush_tlb_all(); #else /* FIXME: currently broken, causing space id and protection ids - * to go out of sync, resulting in faults on userspace accesses. + * to go out of sync, resulting in faults on userspace accesses. + * This approach needs further investigation since running many + * small applications (e.g., GCC testsuite) is faster on HP-UX. */ if (mm) { if (mm->context != 0) @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, { unsigned long flags, sid; - /* For one page, it's not worth testing the split_tlb variable */ - - mb(); sid = vma->vm_mm->context; purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - pitlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); purge_tlb_end(flags); } - -void __flush_tlb_range(unsigned long sid, - unsigned long start, unsigned long end); - -#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) - -#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) - #endif diff --git a/arch/parisc/include/uapi/asm/siginfo.h b/arch/parisc/include/uapi/asm/siginfo.h index d7034728f3778..1c75565d984b4 100644 --- a/arch/parisc/include/uapi/asm/siginfo.h +++ b/arch/parisc/include/uapi/asm/siginfo.h @@ -1,6 +1,10 @@ #ifndef _PARISC_SIGINFO_H #define _PARISC_SIGINFO_H +#if defined(__LP64__) +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + #include #undef NSIGTRAP diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index f6448c7c62b51..cda6dbbe98426 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; + +#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; - unsigned long size; + unsigned long size, start; alltime = mfctl(16); flush_data_cache(); @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void) /* Racy, but if we see an intermediate value, it's ok too... */ parisc_cache_flush_threshold = size * alltime / rangetime; - parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); + parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); if (!parisc_cache_flush_threshold) parisc_cache_flush_threshold = FLUSH_THRESHOLD; if (parisc_cache_flush_threshold > cache_info.dc_size) parisc_cache_flush_threshold = cache_info.dc_size; - printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus()); + printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + parisc_cache_flush_threshold/1024); + + /* calculate TLB flush threshold */ + + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + size = PAGE_SIZE; + start = (unsigned long) _text; + rangetime = mfctl(16); + while (start < (unsigned long) _end) { + flush_tlb_kernel_range(start, start + PAGE_SIZE); + start += PAGE_SIZE; + size += PAGE_SIZE; + } + rangetime = mfctl(16) - rangetime; + + printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime, size, rangetime); + + parisc_tlb_flush_threshold = size * alltime / rangetime; + parisc_tlb_flush_threshold *= num_online_cpus(); + parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); + if (!parisc_tlb_flush_threshold) + parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; + + printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + parisc_tlb_flush_threshold/1024); } extern void purge_kernel_dcache_page_asm(unsigned long); @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, } EXPORT_SYMBOL(copy_user_page); -void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) -{ - unsigned long flags; - - /* Note: purge_tlb_entries can be called at startup with - no context. */ - - purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(purge_tlb_entries); - -void __flush_tlb_range(unsigned long sid, unsigned long start, - unsigned long end) +/* __flush_tlb_range() + * + * returns 1 if all TLBs were flushed. + */ +int __flush_tlb_range(unsigned long sid, unsigned long start, + unsigned long end) { - unsigned long npages; + unsigned long flags, size; - npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ + size = (end - start); + if (size >= parisc_tlb_flush_threshold) { flush_tlb_all(); - else { - unsigned long flags; + return 1; + } + /* Purge TLB entries for small ranges using the pdtlb and + pitlb instructions. These instructions execute locally + but cause a purge request to be broadcast to other TLBs. */ + if (likely(!split_tlb)) { + while (start < end) { + purge_tlb_start(flags); + mtsp(sid, 1); + pdtlb(start); + purge_tlb_end(flags); + start += PAGE_SIZE; + } + return 0; + } + + /* split TLB case */ + while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); - if (split_tlb) { - while (npages--) { - pdtlb(start); - pitlb(start); - start += PAGE_SIZE; - } - } else { - while (npages--) { - pdtlb(start); - start += PAGE_SIZE; - } - } + pdtlb(start); + pitlb(start); purge_tlb_end(flags); + start += PAGE_SIZE; } + return 0; } static void cacheflush_h_tmp_function(void *dummy) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 75819617f93b9..c5ef4081b01d2 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -45,7 +45,7 @@ .level 2.0 #endif - .import pa_dbit_lock,data + .import pa_tlb_lock,data /* space_to_prot macro creates a prot id from a space id */ @@ -420,8 +420,8 @@ SHLREG %r9,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ - shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd - LDREG %r0(\pmd),\pte /* pmd is now pte */ + shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ + LDREG %r0(\pmd),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm @@ -453,57 +453,53 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_dbit_lock lock. */ - .macro dbit_lock spc,tmp,tmp1 + /* Acquire pa_tlb_lock lock and recheck page is still present. */ + .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_dbit_lock),\tmp + load32 PA(pa_tlb_lock),\tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop + LDREG 0(\ptp),\pte + bb,<,n \pte,_PAGE_PRESENT_BIT,2f + b \fault + stw \spc,0(\tmp) 2: #endif .endm - /* Release pa_dbit_lock lock without reloading lock address. */ - .macro dbit_unlock0 spc,tmp + /* Release pa_tlb_lock lock without reloading lock address. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif .endm - /* Release pa_dbit_lock lock. */ - .macro dbit_unlock1 spc,tmp + /* Release pa_tlb_lock lock. */ + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_dbit_lock),\tmp - dbit_unlock0 \spc,\tmp + load32 PA(pa_tlb_lock),\tmp + tlb_unlock0 \spc,\tmp #endif .endm /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep spc,ptep,pte,tmp,tmp1 -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_accessed ptp,pte,tmp,tmp1 ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 - STREG \tmp,0(\ptep) + STREG \tmp,0(\ptp) .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty spc,ptep,pte,tmp -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_dirty ptp,pte,tmp ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte - STREG \pte,0(\ptep) + STREG \pte,0(\ptp) .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1148,14 +1144,14 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1174,14 +1170,14 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1202,20 +1198,20 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1235,21 +1231,20 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1269,16 +1264,16 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1297,16 +1292,16 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 - idtlbt pte,prot - dbit_unlock1 spc,t0 + idtlbt pte,prot + tlb_unlock1 spc,t0 rfir nop @@ -1406,14 +1401,14 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1430,14 +1425,14 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1458,20 +1453,20 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1482,20 +1477,20 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1516,16 +1511,16 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1536,16 +1531,16 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1568,14 +1563,14 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop #else @@ -1588,8 +1583,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1600,8 +1595,8 @@ dbit_trap_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop @@ -1612,16 +1607,16 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 - idtlbt pte,prot - dbit_unlock0 spc,t0 + idtlbt pte,prot + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index f3191db6e2e94..c0eab24f6a9e3 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *regs) struct pt_regs *old_regs; unsigned long eirr_val; int irq, cpu = smp_processor_id(); -#ifdef CONFIG_SMP struct irq_desc *desc; +#ifdef CONFIG_SMP cpumask_t dest; #endif @@ -521,8 +521,12 @@ void do_cpu_irq_mask(struct pt_regs *regs) goto set_out; irq = eirr_to_irq(eirr_val); -#ifdef CONFIG_SMP + /* Filter out spurious interrupts, mostly from serial port at bootup */ desc = irq_to_desc(irq); + if (unlikely(!desc->action)) + goto set_out; + +#ifdef CONFIG_SMP cpumask_copy(&dest, desc->irq_data.affinity); if (irqd_is_per_cpu(&desc->irq_data) && !cpumask_test_cpu(smp_processor_id(), &dest)) { diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 9585c81f755fc..ce0b2b4075c70 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - /* Do the secure computing check first. */ secure_computing_strict(regs->gr[20]); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - ret = -1L; + tracehook_report_syscall_entry(regs)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + regs->gr[20] = -1UL; + goto out; + } #ifdef CONFIG_64BIT if (!is_compat_task()) @@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); - return ret ? : regs->gr[20]; +out: + return regs->gr[20]; } void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 7ef22e3387e09..02cf40c96fe34 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -343,7 +343,7 @@ tracesys_next: #endif comiclr,>>= __NR_Linux_syscalls, %r20, %r0 - b,n .Lsyscall_nosys + b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 @@ -359,6 +359,9 @@ tracesys_next: be 0(%sr7,%r19) ldo R%tracesys_exit(%r2),%r2 +.Ltracesys_nosys: + ldo -ENOSYS(%r0),%r28 /* set errno */ + /* Do *not* call this function on the gateway page, because it makes a direct call to syscall_trace. */ @@ -821,7 +824,7 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT 19: ldd,ma 0(%sr3,%r26), %r29 - sub,= %r29, %r25, %r0 + sub,*= %r29, %r25, %r0 b,n cas2_end 20: std,ma %r24, 0(%sr3,%r26) copy %r0, %r28 diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 47ee620d15d27..7f67c4c96a7a3 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -43,10 +43,6 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -DEFINE_SPINLOCK(pa_dbit_lock); -#endif - static void parisc_show_stack(struct task_struct *task, unsigned long *sp, struct pt_regs *regs); diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda53b8ed..4eec430d8fa86 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d463c68fe7f05..99897f6645c12 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -152,14 +152,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) unsigned int prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=&r" (prev), "+m" (*p) @@ -198,13 +198,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) unsigned long prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 # __cmpxchg_u64\n\ cmpd 0,%0,%3\n\ bne- 2f\n\ stdcx. %4,0,%2\n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=&r" (prev), "+m" (*p) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a52db28ecc1e1..4457cb605356d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -79,6 +79,7 @@ struct pci_dn; #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */ +#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ @@ -336,19 +337,13 @@ static inline int eeh_check_failure(const volatile void __iomem *token) #define eeh_dev_check_failure(x) (0) -static inline void eeh_addr_cache_build(void) { } - -static inline void eeh_add_device_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_late(struct pci_dev *dev) { } - -static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } - -static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } - -static inline void eeh_remove_device(struct pci_dev *dev) { } +#define eeh_addr_cache_build() +#define eeh_add_device_early(pdn) +#define eeh_add_device_tree_early(pdn) +#define eeh_add_device_late(pdev) +#define eeh_add_device_tree_late(pbus) +#define eeh_add_sysfs_files(pbus) +#define eeh_remove_device(pdev) #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 43e6ad424c7fc..88d27e3258d2b 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -135,7 +135,19 @@ #define pte_iterate_hashed_end() } while(0) #ifdef CONFIG_PPC_HAS_HASH_64K -#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) \ + ({ \ + unsigned int psize; \ + if (is_kernel_addr(addr)) \ + psize = MMU_PAGE_4K; \ + else \ + psize = get_slice_psize(mm, addr); \ + psize; \ + }) #else #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index af56b5c6c81ab..f4f99f01b7468 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -108,6 +108,7 @@ #define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */ #define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */ #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ +#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */ #define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) #define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 7a4ede16b2836..b77ef369c0f0e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -343,6 +343,7 @@ extern void rtas_power_off(void); extern void rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); +extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 58abeda64cb7a..15cca17cba4b9 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -29,6 +29,7 @@ static inline void save_early_sprs(struct thread_struct *prev) {} extern void enable_kernel_fp(void); extern void enable_kernel_altivec(void); +extern void enable_kernel_vsx(void); extern int emulate_altivec(struct pt_regs *); extern void __giveup_vsx(struct task_struct *); extern void giveup_vsx(struct task_struct *); diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index e682a7143edb7..c50868681f9ea 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -44,7 +44,7 @@ static inline void isync(void) MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" -#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n" #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" #else #define PPC_ACQUIRE_BARRIER diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 59dad113897b0..c2d21d11c2d2c 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -295,6 +295,8 @@ do { \ #define R_PPC64_TLSLD 108 #define R_PPC64_TOCSAVE 109 +#define R_PPC64_ENTRY 118 + #define R_PPC64_REL16 249 #define R_PPC64_REL16_LO 250 #define R_PPC64_REL16_HI 251 diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9ee61d15653d6..cb565ad0a5b63 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -310,11 +310,26 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) if (!(pe->type & EEH_PE_PHB)) { if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + + /* + * The config space of some PCI devices can't be accessed + * when their PEs are in frozen state. Otherwise, fenced + * PHB might be seen. Those PEs are identified with flag + * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED + * is set automatically when the PE is put to EEH_PE_ISOLATED. + * + * Restoring BARs possibly triggers PCI config access in + * (OPAL) firmware and then causes fenced PHB. If the + * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's + * pointless to restore BARs and dump config space. + */ eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); + if (!(pe->state & EEH_PE_CFG_BLOCKED)) { + eeh_pe_restore_bars(pe); - pci_regs_buf[0] = 0; - eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); + pci_regs_buf[0] = 0; + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); + } } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); @@ -1118,9 +1133,6 @@ void eeh_add_device_late(struct pci_dev *dev) return; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - /* * The EEH cache might not be removed correctly because of * unbalanced kref to the device during unplug time, which @@ -1144,6 +1156,9 @@ void eeh_add_device_late(struct pci_dev *dev) dev->dev.archdata.edev = NULL; } + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) + eeh_ops->probe(pdn, NULL); + edev->pdev = dev; dev->dev.archdata.edev = edev; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 24768ff3cb730..90cc67904dc67 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -561,6 +561,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pcibios_remove_pci_devices(bus); pci_unlock_rescan_remove(); @@ -792,6 +793,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) * the their PCI config any more. */ if (frozen_bus) { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); pci_lock_rescan_remove(); @@ -875,6 +877,7 @@ static void eeh_handle_special_event(void) continue; /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 35f0b62259bbd..c3e0420b8a424 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -861,32 +861,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) const char *eeh_pe_loc_get(struct eeh_pe *pe) { struct pci_bus *bus = eeh_pe_bus_get(pe); - struct device_node *dn = pci_bus_to_OF_node(bus); + struct device_node *dn; const char *loc = NULL; - if (!dn) - goto out; + while (bus) { + dn = pci_bus_to_OF_node(bus); + if (!dn) { + bus = bus->parent; + continue; + } - /* PHB PE or root PE ? */ - if (pci_is_root_bus(bus)) { - loc = of_get_property(dn, "ibm,loc-code", NULL); - if (!loc) + if (pci_is_root_bus(bus)) loc = of_get_property(dn, "ibm,io-base-loc-code", NULL); + else + loc = of_get_property(dn, "ibm,slot-location-code", + NULL); + if (loc) - goto out; + return loc; - /* Check the root port */ - dn = dn->child; - if (!dn) - goto out; + bus = bus->parent; } - loc = of_get_property(dn, "ibm,loc-code", NULL); - if (!loc) - loc = of_get_property(dn, "ibm,slot-location-code", NULL); - -out: - return loc ? loc : "N/A"; + return "N/A"; } /** @@ -909,7 +906,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) bus = pe->phb->bus; } else if (pe->type & EEH_PE_BUS || pe->type & EEH_PE_DEVICE) { - if (pe->bus) { + if (pe->state & EEH_PE_PRI_BUS) { bus = pe->bus; goto out; } diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce42..112ccf4975620 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -51,6 +51,22 @@ .text +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 68384514506b7..59663af9315fc 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, */ break; + case R_PPC64_ENTRY: + /* + * Optimize ELFv2 large code model entry point if + * the TOC is within 2GB range of current location. + */ + value = my_r2(sechdrs, me) - (unsigned long)location; + if (value + 0x80008000 > 0xffffffff) + break; + /* + * Check for the large code model prolog sequence: + * ld r2, ...(r12) + * add r2, r2, r12 + */ + if ((((uint32_t *)location)[0] & ~0xfffc) + != 0xe84c0000) + break; + if (((uint32_t *)location)[1] != 0x7c426214) + break; + /* + * If found, replace it with: + * addis r2, r12, (.TOC.-func)@ha + * addi r2, r12, (.TOC.-func)@l + */ + ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value); + ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value); + break; + case R_PPC64_REL16_HA: /* Subtract location pointer */ value -= (unsigned long)location; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index febb50dd53285..c8c8275765e75 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -204,8 +204,6 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -#if 0 -/* not currently used, but some crazy RAID module might want to later */ void enable_kernel_vsx(void) { WARN_ON(preemptible()); @@ -220,7 +218,6 @@ void enable_kernel_vsx(void) #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_vsx); -#endif void giveup_vsx(struct task_struct *tsk) { @@ -554,6 +551,24 @@ static void tm_reclaim_thread(struct thread_struct *thr, msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; } + /* + * Use the current MSR TM suspended bit to track if we have + * checkpointed state outstanding. + * On signal delivery, we'd normally reclaim the checkpointed + * state to obtain stack pointer (see:get_tm_stackpointer()). + * This will then directly return to userspace without going + * through __switch_to(). However, if the stack frame is bad, + * we need to exit this thread which calls __switch_to() which + * will again attempt to reclaim the already saved tm state. + * Hence we need to check that we've not already reclaimed + * this state. + * We do this using the current MSR, rather tracking it in + * some specific thread_struct bit, as it has the additional + * benifit of checking for a potential TM bad thing exception. + */ + if (!MSR_TM_SUSPENDED(mfmsr())) + return; + tm_reclaim(thr, thr->regs->msr, cause); /* Having done the reclaim, we now have the checkpointed diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7a488c108410b..5607693f35cf0 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -584,6 +584,23 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); +int rtas_get_sensor_fast(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 2, 2, state, sensor, index); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; @@ -1024,6 +1041,9 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!rtas.entry) + return -EINVAL; + if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0) return -EFAULT; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f927..7356c33dc897f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -875,6 +875,15 @@ static long restore_tm_user_regs(struct pt_regs *regs, return 1; #endif /* CONFIG_SPE */ + /* Get the top half of the MSR from the user context */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + msr_hi <<= 32; + /* If TM bits are set to the reserved value, it's an invalid context */ + if (MSR_TM_RESV(msr_hi)) + return 1; + /* Pull in the MSR TM bits from the user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); /* Now, recheckpoint. This loads up all of the checkpointed (older) * registers, including FP and V[S]Rs. After recheckpointing, the * transactional versions should be loaded. @@ -884,11 +893,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* Get the top half of the MSR */ - if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) - return 1; - /* Pull in MSR TM from user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -966,8 +970,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c7c24d2e2bdbc..164fd64748436 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -427,6 +427,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* Don't allow reserved mode. */ + if (MSR_TM_RESV(msr)) + return -EINVAL; + /* pull in MSR TM from user context */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 453a8a47a4676..964c0ce584ce2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -826,12 +826,15 @@ int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) unsigned long size = kvmppc_get_gpr(vcpu, 4); unsigned long addr = kvmppc_get_gpr(vcpu, 5); u64 buf; + int srcu_idx; int ret; if (!is_power_of_2(size) || (size > sizeof(buf))) return H_TOO_HARD; + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; @@ -866,6 +869,7 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) unsigned long addr = kvmppc_get_gpr(vcpu, 5); unsigned long val = kvmppc_get_gpr(vcpu, 6); u64 buf; + int srcu_idx; int ret; switch (size) { @@ -889,7 +893,9 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) return H_TOO_HARD; } + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index df81caab73833..f5b3de7f7fa24 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -210,6 +210,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { + /* + * Check for illegal transactional state bit combination + * and if we find it, force the TS field to a safe state. + */ + if ((msr & MSR_TS_MASK) == MSR_TS_MASK) + msr &= ~MSR_TS_MASK; vcpu->arch.shregs.msr = msr; kvmppc_end_cede(vcpu); } @@ -2178,7 +2184,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vc->runner = vcpu; if (n_ceded == vc->n_runnable) { kvmppc_vcore_blocked(vc); - } else if (should_resched()) { + } else if (need_resched()) { vc->vcore_state = VCORE_PREEMPT; /* Let something else run */ cond_resched_lock(&vc->lock); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index b027a89737b62..c6d601cc97640 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -421,14 +421,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - u64 pte1; - - pte1 = be64_to_cpu(hpte[1]); hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, pte1, pte_index); + rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); - /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, pte1); + /* + * The reference (R) and change (C) bits in a HPT + * entry can be set by hardware at any time up until + * the HPTE is invalidated and the TLB invalidation + * sequence has completed. This means that when + * removing a HPTE, we need to re-read the HPTE after + * the invalidation sequence has completed in order to + * obtain reliable values of R and C. + */ + remove_revmap_chain(kvm, pte_index, rev, v, + be64_to_cpu(hpte[1])); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4d70df26c402c..f8338e6d3dd78 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1127,6 +1127,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 beq 4f b guest_exit_cont 3: @@ -1170,6 +1171,7 @@ mc_cont: bl kvmhv_accumulate_time #endif + mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit nop @@ -2045,7 +2047,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW - rlwimi r5, r4, 1, DAWRX_WT + rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) std r5, VCPU_DAWRX(r3) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ac3ddf115f3d8..c8fe9ab107922 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -915,21 +915,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); break; case KVM_REG_PPC_VRSAVE: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vrsave = set_reg_val(reg->id, val); + val = get_reg_val(reg->id, vcpu->arch.vrsave); break; #endif /* CONFIG_ALTIVEC */ default: @@ -970,17 +966,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ default: diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9d6a46f..4d87122cf6a72 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 17cea18a09d32..264c473c1b3c0 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, PPC_LI(r_X, 0); } - switch (filter[0].code) { - case BPF_RET | BPF_K: - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - /* first instruction sets A register (or is RET 'constant') */ - break; - default: - /* make sure we dont leak kernel information to user */ + /* make sure we dont leak kernel information to user */ + if (bpf_needs_clear_a(&filter[0])) PPC_LI(r_A, 0); - } } static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 12b638425bb9b..d90893b76e7ce 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { } static bool regs_use_siar(struct pt_regs *regs) { - return !!regs->result; + /* + * When we take a performance monitor exception the regs are setup + * using perf_read_regs() which overloads some fields, in particular + * regs->result to tell us whether to use SIAR. + * + * However if the regs are from another exception, eg. a syscall, then + * they have not been setup using perf_read_regs() and so regs->result + * is something random. + */ + return ((TRAP(regs) == 0xf00) && regs->result); } /* diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ce738ab3d5a9f..abb396876b9ab 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -455,9 +455,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * PCI devices of the PE are expected to be removed prior * to PE reset. */ - if (!edev->pe->bus) + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { edev->pe->bus = pci_find_bus(hose->global_number, pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } /* * Enable EEH explicitly so that we will do EEH check diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2241565b0739f..b831a2ee32e9a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -358,7 +358,7 @@ static void opal_handle_message(void) /* Sanity check */ if (type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Unknown message type: %u\n", __func__, type); + pr_warn_once("%s: Unknown message type: %u\n", __func__, type); return; } opal_message_do_notify(type, (void *)&msg); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bca2aeb6e4b6a..3ff29cf6d05c9 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 02e4a17455164..3b6647e574b6d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -189,7 +189,8 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, + &state); if (state > 3) critical = 1; /* Time Critical */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index df6a7041922b6..e6e8b241d7173 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -268,6 +268,11 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act eeh_dev_init(PCI_DN(np), pci->phb); } break; + case OF_RECONFIG_DETACH_NODE: + pci = PCI_DN(np); + if (pci) + list_del(&pci->list); + break; default: err = NOTIFY_DONE; break; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index f086c6f22dc96..fd16cb5d83f35 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index a3f660eed6dea..89496cf4e04dd 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -65,6 +65,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -72,10 +73,11 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index b2cef18093893..13a34b2375591 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 6e2e6aa378bbe..02a137daa1824 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index d4788111c1617..fac6ac9790fad 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,7 +10,7 @@ targets += misc.o piggy.o sizes.h head.o KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 42506b371b741..4da604ebf6fd8 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -167,7 +167,7 @@ unsigned long decompress_kernel(void) #endif puts("Uncompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); puts("Ok, booting the kernel.\n"); return (unsigned long) output; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b981..2eeb0a0f506d5 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,8 +456,6 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static struct kobject *s390_kobj; - static int __init hypfs_init(void) { int rc; @@ -481,18 +479,16 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_sprp_exit; } - s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); - if (!s390_kobj) { - rc = -ENOMEM; + rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); + if (rc) goto fail_hypfs_diag0c_exit; - } rc = register_filesystem(&hypfs_type); if (rc) goto fail_filesystem; return 0; fail_filesystem: - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); fail_hypfs_diag0c_exit: hypfs_diag0c_exit(); fail_hypfs_sprp_exit: @@ -510,7 +506,7 @@ static int __init hypfs_init(void) static void __exit hypfs_exit(void) { unregister_filesystem(&hypfs_type); - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); hypfs_diag0c_exit(); hypfs_sprp_exit(); hypfs_vm_exit(); diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index cfad7fca01d61..d7697ab802f6c 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -57,7 +57,10 @@ union ctlreg0 { unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 23; + unsigned long : 4; + unsigned long afp : 1; /* AFP-register control */ + unsigned long vx : 1; /* Vector enablement control */ + unsigned long : 17; }; }; diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index bff5e3b6d8223..8ba32436effe4 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -138,6 +138,8 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; + if (!test_facility(34)) + return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index fe8d6924efaa8..24b7e554db277 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -48,6 +48,19 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; +static inline void sigset_to_sigset32(unsigned long *set64, + compat_sigset_word *set32) +{ + set32[0] = (compat_sigset_word) set64[0]; + set32[1] = (compat_sigset_word)(set64[0] >> 32); +} + +static inline void sigset32_to_sigset(compat_sigset_word *set32, + unsigned long *set64) +{ + set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32); +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -280,7 +293,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, /* Restore high gprs from signal stack */ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(&sregs_ext->gprs_high))) + sizeof(sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; @@ -303,10 +316,12 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; @@ -323,10 +338,12 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset))) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -397,7 +414,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, return -EFAULT; /* Create struct sigcontext32 on the signal stack */ - memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sigset_to_sigset32(set->sig, sc.oldmask); sc.sregs = (__u32)(unsigned long __force) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; @@ -458,6 +475,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; @@ -505,11 +523,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, store_sigregs(); /* Create ucontext on the signal stack. */ + sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || - __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f73c80590229..49b74454d7ee6 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -415,7 +415,7 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) ptr += len; /* Copy lower halves of SIMD registers 0-15 */ for (i = 0; i < 16; i++) { - memcpy(ptr, &vx_regs[i], 8); + memcpy(ptr, &vx_regs[i].u[2], 8); ptr += 8; } return ptr; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 505c17c0ae1a6..56b550893593a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,6 +21,7 @@ #include #include #include +#include struct mcck_struct { int kill_task; @@ -129,26 +130,30 @@ static int notrace s390_revalidate_registers(struct mci *mci) } else asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area)); - /* Revalidate vector registers */ - if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!MACHINE_HAS_VX) { + /* Revalidate floating point registers */ + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } else { + /* Revalidate vector registers */ + union ctlreg0 cr0; + if (!mci->vr) { /* * Vector registers can't be restored and therefore @@ -156,8 +161,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) */ kill_task = 1; } + cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.afp = cr0.vx = 1; + __ctl_load(cr0.val, 0, 0); restore_vx_regs((__vector128 *) - S390_lowcore.vector_save_area_addr); + &S390_lowcore.vector_save_area); + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Revalidate access registers */ asm volatile( diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index dc5edc29b73aa..8f587d871b9f2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, asmlinkage void execve_tail(void) { current->thread.fp_regs.fpc = 0; - asm volatile("sfpc %0,%0" : : "d" (0)); + asm volatile("sfpc %0" : : "d" (0)); } /* diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index 43c3169ea49c7..ada0c07fe1a87 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -270,6 +270,8 @@ ENTRY(_sclp_print_early) jno .Lesa2 ahi %r15,-80 stmh %r6,%r15,96(%r15) # store upper register halves + basr %r13,0 + lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves .Lesa2: lr %r10,%r2 # save string pointer lhi %r2,0 @@ -291,6 +293,8 @@ ENTRY(_sclp_print_early) .Lesa3: lm %r6,%r15,120(%r15) # restore registers br %r14 +.Lzeroes: + .fill 64,4,0 .LwritedataS4: .long 0x00760005 # SCLP command for write data diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7262fe438c990..1942f22e66940 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -683,7 +683,7 @@ static void __init setup_memory(void) /* * Setup hardware capabilities. */ -static void __init setup_hwcaps(void) +static int __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; struct cpuid cpu_id; @@ -749,9 +749,11 @@ static void __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_TE; /* - * Vector extension HWCAP_S390_VXRS is bit 11. + * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension + * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX + * instead of facility bit 129. */ - if (test_facility(129)) + if (MACHINE_HAS_VX) elf_hwcap |= HWCAP_S390_VXRS; get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); @@ -788,7 +790,9 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + return 0; } +arch_initcall(setup_hwcaps); /* * Add system information as device randomness @@ -870,11 +874,6 @@ void __init setup_arch(char **cmdline_p) smp_fill_possible_mask(); cpu_init(); - /* - * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). - */ - setup_hwcaps(); - /* * Create kernel page tables and switch to virtual addressing. */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9de47265ef73d..3dbba9a2bb0f7 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1054,14 +1054,13 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) src_id, 0, 2); /* sending vcpu invalid */ - if (src_id >= KVM_MAX_VCPUS || - kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; if (sclp_has_sigpif()) return __inject_extcall_sigpif(vcpu, src_id); - if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) + if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; *extcall = irq->u.extcall; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1134,6 +1133,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, irq->u.emerg.code, 0, 2); + /* sending vcpu invalid */ + if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL) + return -EINVAL; + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1606,6 +1609,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) int i; spin_lock(&fi->lock); + fi->pending_irqs = 0; + memset(&fi->srv_signal, 0, sizeof(fi->srv_signal)); + memset(&fi->mchk, 0, sizeof(fi->mchk)); for (i = 0; i < FIRQ_LIST_COUNT; i++) clear_irq_list(&fi->lists[i]); for (i = 0; i < FIRQ_MAX_COUNT; i++) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8cd8e7b288c5d..c3805cf4b9823 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -283,12 +283,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - if (MACHINE_HAS_VX) { + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac->mask, 129); set_kvm_facility(kvm->arch.model.fac->list, 129); r = 0; } else r = -EINVAL; + mutex_unlock(&kvm->lock); break; case KVM_CAP_S390_USER_STSI: kvm->arch.user_stsi = 1; @@ -1031,7 +1035,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); - sca_offset = (sca_offset + 16) & 0x7f0; + sca_offset += 16; + if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + sca_offset = 0; kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 72e58bd2bee78..7171056fc24dd 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -294,12 +294,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, u16 cpu_addr, u32 parameter, u64 *status_reg) { int rc; - struct kvm_vcpu *dst_vcpu; + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; @@ -481,7 +477,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); if (order_code == SIGP_EXTERNAL_CALL) { - dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); kvm_s390_vcpu_wakeup(dest_vcpu); diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 4d1ee88864e8a..18c8b819b0aa9 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start, int i; /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn += i; + p->fixup += i + 4; + } sort(start, finish - start, sizeof(*start), cmp_ex, NULL); /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn -= i; + p->fixup -= i + 4; + } } #ifdef CONFIG_MODULES diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 55423d8be5801..dc2d7aa564401 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -227,7 +227,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) ({ \ /* Branch instruction needs 6 bytes */ \ int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ - _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \ + _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) @@ -415,13 +415,13 @@ static void bpf_jit_prologue(struct bpf_jit *jit) EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, BPF_REG_1, offsetof(struct sk_buff, data)); } - /* BPF compatibility: clear A (%b7) and X (%b8) registers */ - if (REG_SEEN(BPF_REG_7)) - /* lghi %b7,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_7, 0); - if (REG_SEEN(BPF_REG_8)) - /* lghi %b8,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_8, 0); + /* BPF compatibility: clear A (%b0) and X (%b7) registers */ + if (REG_SEEN(BPF_REG_A)) + /* lghi %ba,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_A, 0); + if (REG_SEEN(BPF_REG_X)) + /* lghi %bx,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_X, 0); } /* diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index 95470a472d2cf..208a9753ab38c 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -132,7 +132,7 @@ void decompress_kernel(void) puts("Uncompressing Linux... "); cache_control(CACHE_ENABLE); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); cache_control(CACHE_DISABLE); puts("Ok, booting the kernel.\n"); } diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 2e48eb8813ffa..c90930de76ba8 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -433,6 +433,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, .setkey = aes_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, @@ -452,6 +453,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, .setkey = aes_set_key, .encrypt = ctr_crypt, .decrypt = ctr_crypt, diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 6bf2479a12fbe..561a84d93cf68 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -274,6 +274,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, .setkey = camellia_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index dd6a34fa6e19d..61af794aa2d31 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -429,6 +429,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, .setkey = des_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, @@ -485,6 +486,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, .setkey = des3_ede_set_key, .encrypt = cbc3_encrypt, .decrypt = cbc3_decrypt, diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 1f0aa2024e94b..6424249d5f785 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -28,16 +28,10 @@ * Must preserve %o5 between VISEntryHalf and VISExitHalf */ #define VISEntryHalf \ - rd %fprs, %o5; \ - andcc %o5, FPRS_FEF, %g0; \ - be,pt %icc, 297f; \ - sethi %hi(298f), %g7; \ - sethi %hi(VISenterhalf), %g1; \ - jmpl %g1 + %lo(VISenterhalf), %g0; \ - or %g7, %lo(298f), %g7; \ - clr %o5; \ -297: wr %o5, FPRS_FEF, %fprs; \ -298: + VISEntry + +#define VISExitHalf \ + VISExit #define VISEntryHalfFast(fail_label) \ rd %fprs, %o5; \ @@ -47,7 +41,7 @@ ba,a,pt %xcc, fail_label; \ 297: wr %o5, FPRS_FEF, %fprs; -#define VISExitHalf \ +#define VISExitHalfFast \ wr %o5, 0, %fprs; #ifndef __ASSEMBLY__ diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 30e7ddb27a3a9..c690c8e16a96e 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -413,7 +413,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) { - int ret; + long ret; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 140527a20e7df..83aeeb1dffdb3 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -240,8 +240,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +#ifdef NON_USER_COPY + VISExitHalfFast +#else VISExitHalf - +#endif brz,pn %o2, .Lexit cmp %o2, 19 ble,pn %icc, .Lsmall_unaligned diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index b320ae9e2e2e8..a063d84336d63 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -44,9 +44,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stx %g3, [%g6 + TI_GSR] 2: add %g6, %g1, %g3 - cmp %o5, FPRS_DU - be,pn %icc, 6f - sll %g1, 3, %g1 + mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 + sll %g1, 3, %g1 stb %o5, [%g3 + TI_FPSAVED] rd %gsr, %g2 add %g6, %g1, %g3 @@ -80,65 +79,3 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop - -6: ldub [%g3 + TI_FPSAVED], %o5 - or %o5, FPRS_DU, %o5 - add %g6, TI_FPREGS+0x80, %g2 - stb %o5, [%g3 + TI_FPSAVED] - - sll %g1, 5, %g1 - add %g6, TI_FPREGS+0xc0, %g3 - wr %g0, FPRS_FEF, %fprs - membar #Sync - stda %f32, [%g2 + %g1] ASI_BLK_P - stda %f48, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 80f - nop - - .align 32 -80: jmpl %g7 + %g0, %g0 - nop - - .align 32 -VISenterhalf: - ldub [%g6 + TI_FPDEPTH], %g1 - brnz,a,pn %g1, 1f - cmp %g1, 1 - stb %g0, [%g6 + TI_FPSAVED] - stx %fsr, [%g6 + TI_XFSR] - clr %o5 - jmpl %g7 + %g0, %g0 - wr %g0, FPRS_FEF, %fprs - -1: bne,pn %icc, 2f - srl %g1, 1, %g1 - ba,pt %xcc, vis1 - sub %g7, 8, %g7 -2: addcc %g6, %g1, %g3 - sll %g1, 3, %g1 - andn %o5, FPRS_DU, %g2 - stb %g2, [%g3 + TI_FPSAVED] - - rd %gsr, %g2 - add %g6, %g1, %g3 - stx %g2, [%g3 + TI_GSR] - add %g6, %g1, %g2 - stx %fsr, [%g2 + TI_XFSR] - sll %g1, 5, %g1 -3: andcc %o5, FPRS_DL, %g0 - be,pn %icc, 4f - add %g6, TI_FPREGS, %g2 - - add %g6, TI_FPREGS+0x40, %g3 - membar #Sync - stda %f0, [%g2 + %g1] ASI_BLK_P - stda %f16, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 4f - nop - - .align 32 -4: and %o5, FPRS_DU, %o5 - jmpl %g7 + %g0, %g0 - wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1d649a95660c8..8069ce12f20b1 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -135,10 +135,6 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); -/* CRYPTO code needs this */ -void VISenterhalf(void); -EXPORT_SYMBOL(VISenterhalf); - extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 7931eeeb649af..8109e92cd619e 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp) } emit_reg_move(O7, r_saved_O7); - switch (filter[0].code) { - case BPF_RET | BPF_K: - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - /* The first instruction sets the A register (or is - * a "RET 'constant'") - */ - break; - default: - /* Make sure we dont leak kernel information to the - * user. - */ + /* Make sure we dont leak kernel information to the user. */ + if (bpf_needs_clear_a(&filter[0])) emit_clear(r_A); /* A = 0 */ - } for (i = 0; i < flen; i++) { unsigned int K = filter[i].k; diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index e8c2c04143cda..c667e104a0c25 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) return -EFAULT; - memset(to, 0, sizeof(*to)); - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index d366675e4bf88..396b5c96e2724 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1139,7 +1139,7 @@ static void __init load_hv_initrd(void) void __init free_initrd_mem(unsigned long begin, unsigned long end) { - free_bootmem(__pa(begin), end - begin); + free_bootmem_late(__pa(begin), end - begin); } static int __init setup_initrd(char *str) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 47f1ff056a54f..22a358ef1b0cd 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -94,6 +94,8 @@ static int start_ptraced_child(void) { int pid, n, status; + fflush(stdout); + pid = fork(); if (pid == 0) ptrace_child(); diff --git a/arch/unicore32/boot/compressed/misc.c b/arch/unicore32/boot/compressed/misc.c index 176d5bda3559d..5c65dfee278c0 100644 --- a/arch/unicore32/boot/compressed/misc.c +++ b/arch/unicore32/boot/compressed/misc.c @@ -119,8 +119,8 @@ unsigned long decompress_kernel(unsigned long output_start, output_ptr = get_unaligned_le32(tmp); arch_decomp_puts("Uncompressing Linux..."); - decompress(input_data, input_data_end - input_data, NULL, NULL, - output_data, NULL, error); + __decompress(input_data, input_data_end - input_data, NULL, NULL, + output_data, 0, NULL, error); arch_decomp_puts(" done, booting the kernel.\n"); return output_ptr; } diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 48304b89b601f..4c3f76b425c10 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -667,6 +667,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, bool conout_found = false; void *dummy = NULL; u32 h = handles[i]; + u32 current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop32); @@ -678,7 +679,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query32(gop32, &info, &size, &fb_base); + status = __gop_query32(gop32, &info, &size, ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -692,6 +693,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, pixel_format = info->pixel_format; pixel_info = info->pixel_information; pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; /* * Once we've found a GOP supporting ConOut, @@ -770,6 +772,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, bool conout_found = false; void *dummy = NULL; u64 h = handles[i]; + u32 current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop64); @@ -781,7 +784,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query64(gop64, &info, &size, &fb_base); + status = __gop_query64(gop64, &info, &size, ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -795,6 +798,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, pixel_format = info->pixel_format; pixel_info = info->pixel_information; pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; /* * Once we've found a GOP supporting ConOut, @@ -1193,6 +1197,10 @@ static efi_status_t setup_e820(struct boot_params *params, unsigned int e820_type = 0; unsigned long m = efi->efi_memmap; +#ifdef CONFIG_X86_64 + m |= (u64)efi->efi_memmap_hi << 32; +#endif + d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); switch (d->type) { case EFI_RESERVED_TYPE: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a107b935e22fb..e28437e0f7088 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -424,7 +424,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, output_len, + NULL, error); parse_elf(output); /* * 32-bit always performs relocations. 64-bit relocations are only diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf06bdd3..daf8d2b9a2173 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -294,6 +294,7 @@ static struct ahash_alg ghash_async_alg = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", .cra_priority = 400, + .cra_ctxsize = sizeof(struct ghash_async_ctx), .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47a62d..6b8d6e8cd4494 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -27,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#define BOOT_HEAP_SIZE 0x8000 +#define BOOT_HEAP_SIZE 0x10000 #endif /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd26470..4e10d73cf0184 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422fbad8a..74a2a8dc99089 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -14,15 +14,11 @@ #ifndef __ASSEMBLY__ -extern pte_t kasan_zero_pte[]; -extern pte_t kasan_zero_pmd[]; -extern pte_t kasan_zero_pud[]; - #ifdef CONFIG_KASAN -void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_early_init(void); void __init kasan_init(void); #else -static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4a555beef190..41b06fca39f70 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -591,7 +591,7 @@ struct kvm_arch { struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - int vapics_in_nmi_mode; + atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b48..364d27481a52a 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; + struct ldt_struct *ldt; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 883f6b933fa4b..73e38f14ddebe 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -23,7 +23,7 @@ extern struct static_key rdpmc_always_available; static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_true(&rdpmc_always_available) || + if (static_key_false(&rdpmc_always_available) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else @@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) static inline void load_mm_cr4(struct mm_struct *mm) {} #endif +/* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + int size; +}; + +static inline void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} + /* * Used for LDT copy/destruction. */ @@ -60,8 +104,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + * + */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ @@ -78,12 +150,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); } #ifdef CONFIG_SMP else { @@ -98,15 +170,19 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 78f0c8cbe316f..74fcdf3f1534c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -337,20 +337,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) } static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { + pgprotval_t val = pgprot_val(pgprot); pgprot_t new; - unsigned long val; - val = pgprot_val(pgprot); pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); return new; } static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) { + pgprotval_t val = pgprot_val(pgprot); pgprot_t new; - unsigned long val; - val = pgprot_val(pgprot); pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT_LARGE) >> (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 8f32718425339..67b6cd00a44f8 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!raw_cpu_read_4(__preempt_count)); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPT diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 6fe6b182c9981..9dfce4e0417d9 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short __pad2; /* Was called gs, but was always zero. */ - unsigned short __pad1; /* Was called fs, but was always zero. */ - unsigned short ss; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8a2cd34..d8b9f9081e86f 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,24 +177,9 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - - /* - * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), - * Linux saved and restored fs and gs in these slots. This - * was counterproductive, as fsbase and gsbase were never - * saved, so arch_prctl was presumably unreliable. - * - * If these slots are ever needed for any other purpose, there - * is some risk that very old 64-bit binaries could get - * confused. I doubt that many such binaries still work, - * though, since the same patch in 2.5.64 also removed the - * 64-bit set_thread_area syscall, so it appears that there is - * no TLS API that works in both pre- and post-2.5.64 kernels. - */ - __u16 __pad2; /* Was gs. */ - __u16 __pad1; /* Was fs. */ - - __u16 ss; + __u16 gs; + __u16 fs; + __u16 __pad0; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b5d7640abc5d6..8a4add8e46393 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -100,6 +100,7 @@ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ { SVM_EXIT_INTR, "interrupt" }, \ { SVM_EXIT_NMI, "nmi" }, \ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dbe76a14c3c99..07bea80223f69 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -489,6 +489,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d1daead5fcddd..adb3eaf8fe2a5 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include #include +#include #include "../../realmode/rm/wakeup.h" #include "sleep.h" @@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ + /* + * Pause/unpause graph tracing around do_suspend_lowlevel as it has + * inconsistent call/return info after it jumps to the wakeup vector. + */ + pause_graph_tracing(); do_suspend_lowlevel(); + unpause_graph_tracing(); return 0; } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef6531931606..d1918a8c43930 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -325,10 +325,15 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dcb52850a28fc..307a49828826b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -1424,7 +1431,7 @@ static inline void __x2apic_disable(void) { u64 msr; - if (cpu_has_apic) + if (!cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1483,10 +1490,13 @@ void x2apic_setup(void) static __init void x2apic_disable(void) { - u32 x2apic_id; + u32 x2apic_id, state = x2apic_state; + + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; - if (x2apic_state != X2APIC_ON) - goto out; + if (state != X2APIC_ON) + return; x2apic_id = read_apic_id(); if (x2apic_id >= 255) @@ -1494,9 +1504,6 @@ static __init void x2apic_disable(void) __x2apic_disable(); register_lapic_address(mp_lapic_addr); -out: - x2apic_state = X2APIC_DISABLED; - x2apic_mode = 0; } static __init void x2apic_enable(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8ae..5732326ec1260 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -291,10 +291,9 @@ __setup("nosmap", setup_disable_smap); static __always_inline void setup_smap(struct cpuinfo_x86 *c) { - unsigned long eflags; + unsigned long eflags = native_save_fl(); /* This should have been cleared long ago */ - raw_local_save_flags(eflags); BUG_ON(eflags & X86_EFLAGS_AC); if (cpu_has(c, X86_FEATURE_SMAP)) { @@ -1434,7 +1433,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); clear_all_debug_regs(); dbg_restore_debug_regs(); @@ -1483,7 +1482,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b4a41cf030eda..e166d833cf63b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -116,6 +116,27 @@ void mce_intel_hcpu_update(unsigned long cpu) per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; } +static void cmci_toggle_interrupt_mode(bool on) +{ + unsigned long flags, *owned; + int bank; + u64 val; + + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + owned = this_cpu_ptr(mce_banks_owned); + for_each_set_bit(bank, owned, MAX_NR_BANKS) { + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + + if (on) + val |= MCI_CTL2_CMCI_EN; + else + val &= ~MCI_CTL2_CMCI_EN; + + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + } + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + unsigned long cmci_intel_adjust_timer(unsigned long interval) { if ((this_cpu_read(cmci_backoff_cnt) > 0) && @@ -145,7 +166,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) */ if (!atomic_read(&cmci_storm_on_cpus)) { __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); - cmci_reenable(); + cmci_toggle_interrupt_mode(true); cmci_recheck(); } return CMCI_POLL_INTERVAL; @@ -156,22 +177,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) } } -static void cmci_storm_disable_banks(void) -{ - unsigned long flags, *owned; - int bank; - u64 val; - - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = this_cpu_ptr(mce_banks_owned); - for_each_set_bit(bank, owned, MAX_NR_BANKS) { - rdmsrl(MSR_IA32_MCx_CTL2(bank), val); - val &= ~MCI_CTL2_CMCI_EN; - wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); -} - static bool cmci_storm_detect(void) { unsigned int cnt = __this_cpu_read(cmci_storm_cnt); @@ -193,7 +198,7 @@ static bool cmci_storm_detect(void) if (cnt <= CMCI_STORM_THRESHOLD) return false; - cmci_storm_disable_banks(); + cmci_toggle_interrupt_mode(false); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); r = atomic_add_return(1, &cmci_storm_on_cpus); mce_timer_kick(CMCI_STORM_INTERVAL); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4f7001f28936f..4cc98a4e8ea95 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -270,11 +270,7 @@ static bool check_hw_exists(void) static void hw_perf_event_destroy(struct perf_event *event) { - if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_ds_buffers(); - mutex_unlock(&pmc_reserve_mutex); - } + x86_release_hardware(); } void hw_perf_lbr_event_destroy(struct perf_event *event) @@ -324,6 +320,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } +int x86_reserve_hardware(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&active_events) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_ds_buffers(); + } + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmc_reserve_mutex); + } + + return err; +} + +void x86_release_hardware(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_ds_buffers(); + mutex_unlock(&pmc_reserve_mutex); + } +} + /* * Check if we can create event of a certain type (that no conflicting events * are present). @@ -336,9 +361,10 @@ int x86_add_exclusive(unsigned int what) return 0; mutex_lock(&pmc_reserve_mutex); - for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) goto out; + } atomic_inc(&x86_pmu.lbr_exclusive[what]); ret = 0; @@ -527,19 +553,7 @@ static int __x86_pmu_event_init(struct perf_event *event) if (!x86_pmu_initialized()) return -ENODEV; - err = 0; - if (!atomic_inc_not_zero(&active_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_events) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmc_reserve_mutex); - } + err = x86_reserve_hardware(); if (err) return err; @@ -2156,21 +2170,25 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { + struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0; - if (idx > current->active_mm->context.size) + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0; - desc = current->active_mm->context.ldt; + desc = &ldt->entries[idx]; } else { if (idx > GDT_ENTRIES) return 0; - desc = raw_cpu_ptr(gdt_page.gdt); + desc = raw_cpu_ptr(gdt_page.gdt) + idx; } - return get_desc_base(desc + idx); + return get_desc_base(desc); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ef78516850fb0..f068695eaca06 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -703,6 +703,10 @@ int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); +int x86_reserve_hardware(void); + +void x86_release_hardware(void); + void hw_perf_lbr_event_destroy(struct perf_event *event); int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a1e35c9f06b95..22212615a1375 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2098,9 +2098,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls @@ -3253,6 +3256,8 @@ __init int intel_pmu_init(void) case 61: /* 14nm Broadwell Core-M */ case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -3322,13 +3327,13 @@ __init int intel_pmu_init(void) * counter, so do not extend mask to generic counters */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != FIXED_EVENT_FLAGS - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { - continue; + if (c->cmask == FIXED_EVENT_FLAGS + && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } - - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; + c->idxmsk64 &= + ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); } } diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index ac1f0c55f3796..7795f3f8b1d57 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode) static void bts_event_destroy(struct perf_event *event) { + x86_release_hardware(); x86_del_exclusive(x86_lbr_exclusive_bts); } static int bts_event_init(struct perf_event *event) { + int ret; + if (event->attr.type != bts_pmu.type) return -ENOENT; if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; + ret = x86_reserve_hardware(); + if (ret) { + x86_del_exclusive(x86_lbr_exclusive_bts); + return ret; + } + event->destroy = bts_event_destroy; return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index e4d1b8b738fa8..cb77b11bc4143 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -933,6 +933,14 @@ static u64 intel_cqm_event_count(struct perf_event *event) if (!cqm_group_leader(event)) return 0; + /* + * Getting up-to-date values requires an SMP IPI which is not + * possible if we're being called in interrupt context. Return + * the cached values instead. + */ + if (unlikely(in_interrupt())) + goto out; + /* * Notice that we don't perform the reading of an RMID * atomically, because we can't hold a spin lock across the diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c76d3e37c6e1d..403ace539b73d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -184,10 +184,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -213,7 +212,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478da..6c9cb60738326 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -793,8 +793,6 @@ retint_kernel: restore_c_regs_and_iret: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 - -irq_return: INTERRUPT_RETURN ENTRY(native_iret) @@ -1395,7 +1393,18 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) INTR_FRAME + /* + * Fix up the exception frame if we're on Xen. + * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most + * one value to the stack on native, so it may clobber the rdx + * scratch slot, but it won't clobber any of the important + * slots past it. + * + * Xen is a different story, because the Xen frame itself overlaps + * the "NMI executing" variable. + */ PARAVIRT_ADJUST_EXCEPTION_FRAME + /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. @@ -1413,11 +1422,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1426,32 +1436,154 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as our temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + + /* + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. + * + * We also must not push anything to the stack before switching + * stacks lest we corrupt the "NMI executing" variable. + */ + + SWAPGS_UNSAFE_STACK + cld + movq %rsp, %rdx + movq PER_CPU_VAR(kernel_stack), %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + jmp restore_c_regs_and_iret + +.Lnmi_from_kernel: + /* + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: /* - * Check the special variable on the stack to see if NMIs are - * executing. + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ @@ -1462,25 +1594,21 @@ ENTRY(nmi) cmpq %rdx, 4*8(%rsp) /* If it is below the NMI stack, it is a normal NMI */ jb first_nmi - /* Ah, it is within the NMI stack, treat it as nested */ + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ CFI_REMEMBER_STATE nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 1*8 @@ -1499,60 +1627,23 @@ nested_nmi_out: popq_cfi %rdx CFI_RESTORE rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN CFI_RESTORE_STATE first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx CFI_RESTORE rdx - /* Set the NMI executing variable on the stack. */ + /* Set "NMI executing" on the stack. */ pushq_cfi $1 - /* - * Leave room for the "copied" frame - */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp CFI_ADJUST_CFA_OFFSET 5*8 - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq_cfi 11*8(%rsp) .endr @@ -1560,6 +1651,7 @@ first_nmi: /* Everything up to here is safe from nested NMIs */ +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1568,16 +1660,21 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. + * + * Set "NMI executing" in case we came back here via IRET. */ movq $1, 10*8(%rsp) - /* Make another copy, this one may be modified by nested NMIs */ + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ addq $(10*8), %rsp CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 @@ -1588,9 +1685,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK @@ -1605,29 +1702,11 @@ end_repeat_nmi: call paranoid_entry DEFAULT_FRAME 0 - /* - * Save off the CR2 register. If we take a page fault in the NMI then - * it could corrupt the CR2 value. If the NMI preempts a page fault - * handler before it was able to read the CR2 register, and then the - * NMI itself takes a page fault, the page fault that was preempted - * will read the information from the NMI page fault and not the - * origin fault. Save it off and restore it if it changes. - * Use the r12 callee-saved register. - */ - movq %cr2, %r12 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi - /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 -1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: @@ -1635,12 +1714,27 @@ nmi_swapgs: nmi_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS - /* Pop the extra iret frame at once */ + + /* Point RSP at the "iret" frame. */ REMOVE_PT_GPREGS_FROM_STACK 6*8 - /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) - jmp irq_return + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ + INTERRUPT_RETURN CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5a4668136e989..f129a9af63574 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -161,11 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); - kasan_map_early_shadow(early_level4_pgt); - - /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); + clear_page(init_level4_pgt); + + kasan_early_init(); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -177,12 +178,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; - kasan_map_early_shadow(init_level4_pgt); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 53eeb226657ca..7e429c99c7285 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -62,9 +62,16 @@ #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif -/* Number of possible pages in the lowmem region */ -LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) - +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) + /* Enough space to fit pagetables for the low memory linear map */ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index df7e78057ae00..174fa035a09a1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -65,6 +65,9 @@ startup_64: * tables and then reload them. */ + /* Sanitize CPU configuration */ + call verify_cpu + /* * Compute the delta between the address I am compiled to run at and the * address I am actually running at. @@ -174,6 +177,9 @@ ENTRY(secondary_startup_64) * after the boot processor executes this code. */ + /* Sanitize CPU configuration */ + call verify_cpu + movq $(init_level4_pgt - __START_KERNEL_map), %rax 1: @@ -288,6 +294,8 @@ ENTRY(secondary_startup_64) pushq %rax # target address in negative space lretq +#include "verify_cpu.S" + #ifdef CONFIG_HOTPLUG_CPU /* * Boot CPU0 entry point. It's called from play_dead(). Everything has been set @@ -516,38 +524,9 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -#ifdef CONFIG_KASAN -#define FILL(VAL, COUNT) \ - .rept (COUNT) ; \ - .quad (VAL) ; \ - .endr - -NEXT_PAGE(kasan_zero_pte) - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pmd) - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pud) - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) - -#undef FILL -#endif - - #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_KASAN -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -NEXT_PAGE(kasan_zero_page) - .skip PAGE_SIZE -#endif diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cca..2bcc0525f1c10 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,82 +21,82 @@ #include #include -#ifdef CONFIG_SMP +/* context.lock is held for us, so we don't need any locking. */ static void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + mm_context_t *pc; + + if (current->active_mm != current_mm) + return; + + pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); } -#endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ +static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + struct ldt_struct *new_ldt; + int alloc_size; + + if (size > LDT_ENTRIES) + return NULL; + + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + if (!new_ldt) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = size * LDT_ENTRY_SIZE; + + /* + * Xen is very picky: it requires a page-aligned LDT that has no + * trailing nonzero bytes in any page that contains LDT descriptors. + * Keep it simple: zero the whole allocation and never allocate less + * than PAGE_SIZE. + */ + if (alloc_size > PAGE_SIZE) + new_ldt->entries = vzalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; + new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); + if (!new_ldt->entries) { + kfree(new_ldt); + return NULL; + } - paravirt_alloc_ldt(newldt, mincount); + new_ldt->size = size; + return new_ldt; +} -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; +/* After calling this, the LDT is immutable. */ +static void finalize_ldt_struct(struct ldt_struct *ldt) +{ + paravirt_alloc_ldt(ldt->entries, ldt->size); } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +/* context.lock is held */ +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; + /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ + on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); +} - if (err < 0) - return err; +static void free_ldt_struct(struct ldt_struct *ldt) +{ + if (likely(!ldt)) + return; - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + paravirt_free_ldt(ldt->entries, ldt->size); + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + kfree(ldt->entries); + kfree(ldt); } /* @@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + struct ldt_struct *new_ldt; struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); - mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (!old_mm->context.ldt) { + mm->context.ldt = NULL; + goto out_unlock; + } + + new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + memcpy(new_ldt->entries, old_mm->context.ldt->entries, + new_ldt->size * LDT_ENTRY_SIZE); + finalize_ldt_struct(new_ldt); + + mm->context.ldt = new_ldt; + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; } @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; } static int read_ldt(void __user *ptr, unsigned long bytecount) { - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm; - if (!mm->context.size) - return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + /* Zero-fill the rest and pretend we read bytecount bytes. */ + if (clear_user(ptr + size, bytecount - size)) { + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; } static int read_default_ldt(void __user *ptr, unsigned long bytecount) @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) struct desc_struct ldt; int error; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; } - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; } - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { - error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - } - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; + if (old_ldt) + memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; + finalize_ldt_struct(new_ldt); - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0; out_unlock: diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751ce..d05bd2e2ee91e 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C (preventing nested - * NMIs if an NMI takes a trap). Simply have 3 states the NMI - * can be in: + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. + * + * As a result, NMIs can nest if NMIs get unmasked due an IRET during + * NMI processing. On x86_64, the asm glue protects us from nested NMIs + * if the outer NMI came from kernel mode, but we can still nest if the + * outer NMI came from user mode. + * + * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing @@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the exit of the first NMI will - * perform a dec_return, if the result is zero (NOT_RUNNING), then - * it will simply exit the NMI handler. If not, the dec_return - * would have set the state to NMI_EXECUTING (what we want it to - * be when we are running). In this case, we simply jump back - * to rerun the NMI handler again, and restart the 'latched' NMI. + * If an NMI executes an iret, another NMI can preempt it. We do not + * want to allow this new NMI to run, but we want to execute it when the + * first one finishes. We set the state to "latched", and the exit of + * the first NMI will perform a dec_return, if the result is zero + * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the + * dec_return would have set the state to NMI_EXECUTING (what we want it + * to be when we are running). In this case, we simply jump back to + * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING @@ -461,49 +460,36 @@ enum nmi_states { static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); -#define nmi_nesting_preprocess(regs) \ - do { \ - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ - this_cpu_write(nmi_state, NMI_LATCHED); \ - return; \ - } \ - this_cpu_write(nmi_state, NMI_EXECUTING); \ - this_cpu_write(nmi_cr2, read_cr2()); \ - } while (0); \ - nmi_restart: - -#define nmi_nesting_postprocess() \ - do { \ - if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ - write_cr2(this_cpu_read(nmi_cr2)); \ - if (this_cpu_dec_return(nmi_state)) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ +#ifdef CONFIG_X86_64 /* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. + * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without + * some care, the inner breakpoint will clobber the outer breakpoint's + * stack. * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. + * If a breakpoint is being processed, and the debug stack is being + * used, if an NMI comes in and also hits a breakpoint, the stack + * pointer will be set to the same fixed address as the breakpoint that + * was interrupted, causing that stack to be corrupted. To handle this + * case, check if the stack that was interrupted is the debug stack, and + * if so, change the IDT so that new breakpoints will use the current + * stack and not switch to the fixed address. On return of the NMI, + * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); +#endif -static inline void nmi_nesting_preprocess(struct pt_regs *regs) +dotraplinkage notrace void +do_nmi(struct pt_regs *regs, long error_code) { + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { + this_cpu_write(nmi_state, NMI_LATCHED); + return; + } + this_cpu_write(nmi_state, NMI_EXECUTING); + this_cpu_write(nmi_cr2, read_cr2()); +nmi_restart: + +#ifdef CONFIG_X86_64 /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) debug_stack_set_zero(); this_cpu_write(update_debug_stack, 1); } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} #endif -dotraplinkage notrace void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - nmi_enter(); inc_irq_stat(__nmi_count); @@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); +#ifdef CONFIG_X86_64 + if (unlikely(this_cpu_read(update_debug_stack))) { + debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } +#endif + + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c614dd492f5f7..1f316f066c499 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,10 +41,18 @@ #include #include -/* nop stub */ -void _paravirt_nop(void) -{ -} +/* + * nop stub, which must not clobber anything *including the stack* to + * avoid confusing the entry prologues. + */ +extern void _paravirt_nop(void); +asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" + "ret\n\t" + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); /* identity function, which can be inlined */ u32 _paravirt_ident_32(u32 x) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3b1dc04..9717437742489 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -453,6 +453,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static void mwait_idle(void) { if (!current_set_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { smp_mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -464,6 +465,7 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f1744..58e02d9382183 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -122,11 +122,11 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } } @@ -499,27 +499,59 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) return 0; - fp = *(u64 *)(p->thread.sp); + + fp = READ_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 86db4bcd7ce52..0549ae3cb3326 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, + { /* Handle problems with rebooting on the iMac10,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac10,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"), + }, + }, /* ASRock */ { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d74ac33290ae3..1473a02e6ccb2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1194,6 +1194,14 @@ void __init setup_arch(char **cmdline_p) clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); #endif tboot_probe(); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1ea14fd53933b..5d2e2e9af1c4c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); +#else /* !CONFIG_X86_32 */ + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); +#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->__pad2); - put_user_ex(0, &sc->__pad1); - put_user_ex(regs->ss, &sc->ss); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -450,19 +456,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* - * Set up the CS and SS registers to run signal handlers in - * 64-bit mode, even if the handler happens to be interrupting - * 32-bit or 16-bit code. - * - * SS is subtle. In 64-bit mode, we don't need any particular - * SS descriptor, but we do need SS to be valid. It's possible - * that the old SS is entirely bogus -- this can happen if the - * signal we're trying to deliver is #GP or #SS caused by a bad - * SS value. - */ + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - regs->ss = __USER_DS; return 0; } @@ -671,12 +667,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) signal_setup_done(failed, ksig, stepping); } -#ifdef CONFIG_X86_32 -#define NR_restart_syscall __NR_restart_syscall -#else /* !CONFIG_X86_32 */ -#define NR_restart_syscall \ - test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall -#endif /* CONFIG_X86_32 */ +static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) +{ +#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) + return __NR_restart_syscall; +#else /* !CONFIG_X86_32 && CONFIG_X86_64 */ + return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : + __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); +#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ +} /* * Note that 'init' is a special process: it doesn't get signals it doesn't @@ -705,7 +704,7 @@ static void do_signal(struct pt_regs *regs) break; case -ERESTART_RESTARTBLOCK: - regs->ax = NR_restart_syscall; + regs->ax = get_nr_restart_syscall(regs); regs->ip -= 2; break; } diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d01..0ccb53a9fcd93 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include #include #include +#include unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -27,13 +28,14 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + seg >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0cf..21187ebee7d09 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1004,15 +1005,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index b9242bacbe59a..4cf401f581e7e 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -34,10 +34,11 @@ #include verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl + pushf # Save caller passed flags + push $0 # Kill any dangerous flags + popf +#ifndef __x86_64__ pushfl # standard way to check for cpuid popl %eax movl %eax,%ebx @@ -48,6 +49,7 @@ verify_cpu: popl %eax cmpl %eax,%ebx jz verify_cpu_no_longmode # cpu has no cpuid +#endif movl $0x0,%eax # See if cpuid 1 is implemented cpuid @@ -130,10 +132,10 @@ verify_cpu_sse_test: jmp verify_cpu_sse_test # try again verify_cpu_no_longmode: - popfl # Restore caller passed flags + popf # Restore caller passed flags movl $1,%eax ret verify_cpu_sse_ok: - popfl # Restore caller passed flags + popf # Restore caller passed flags xorl %eax, %eax ret diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4dce6f8b6129e..f90952f64e796 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -305,7 +305,7 @@ static void pit_do_work(struct kthread_work *work) * LVT0 to NMI delivery. Other PIC interrupts are just sent to * VCPU0, and only if its LVT0 is in EXTINT mode. */ - if (kvm->arch.vapics_in_nmi_mode > 0) + if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4c7deb4f78a14..7dd9a8d3911ae 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -339,6 +339,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) struct kvm_lapic *apic = vcpu->arch.apic; __kvm_apic_update_irr(pir, apic->regs); + + kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); @@ -1250,10 +1252,10 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) if (!nmi_wd_enabled) { apic_debug("Receive NMI setting on APIC_LVT0 " "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; + atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) @@ -1808,6 +1810,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); apic_update_lvtt(apic); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9d28383fc1e70..c4ea87eedf8a7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -150,7 +150,7 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { - return vcpu->arch.apic->pending_events; + return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b73337634214c..554e877e0bc4a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep) { return ACCESS_ONCE(*sptep); } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - /* It is valid if the spte is zapped. */ - return spte == 0ull; -} #else union split_spte { struct { @@ -478,23 +472,6 @@ static u64 __get_spte_lockless(u64 *sptep) return spte.spte; } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - union split_spte sspte = (union split_spte)spte; - u32 high_mmio_mask = shadow_mmio_mask >> 32; - - /* It is valid if the spte is zapped. */ - if (spte == 0ull) - return true; - - /* It is valid if the spte is being zapped. */ - if (sspte.spte_low == 0ull && - (sspte.spte_high & high_mmio_mask) == high_mmio_mask) - return true; - - return false; -} #endif static bool spte_is_locklessly_modifiable(u64 spte) @@ -3343,21 +3320,6 @@ static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) return vcpu_match_mmio_gva(vcpu, addr); } - -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - * - It is the mmio spte - * - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) -{ - return __check_direct_spte_mmio_pf(spte); -} - static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) { struct kvm_shadow_walk_iterator iterator; @@ -3399,13 +3361,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) return RET_MMIO_PF_EMULATE; } - /* - * It's ok if the gva is remapped by other cpus on shadow guest, - * it's a BUG if the gfn is not a mmio page. - */ - if (direct && !check_direct_spte_mmio_pf(spte)) - return RET_MMIO_PF_BUG; - /* * If the page table is zapped by other cpus, let CPU fault again on * the address. diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6e6d115fe9b54..d537c9badeb66 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -257,7 +257,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, return ret; mark_page_dirty(vcpu->kvm, table_gfn); - walker->ptes[level] = pte; + walker->ptes[level - 1] = pte; } return 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9afa233b5482f..0d039cd268a8a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -511,8 +511,10 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->vmcb->control.next_rip != 0) + if (svm->vmcb->control.next_rip != 0) { + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; + } if (!svm->next_rip) { if (emulate_instruction(vcpu, EMULTYPE_SKIP) != @@ -1103,6 +1105,8 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); + set_exception_intercept(svm, AC_VECTOR); + set_exception_intercept(svm, DB_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1635,20 +1639,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_bp_intercept(struct kvm_vcpu *vcpu) +static void update_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - clr_exception_intercept(svm, DB_VECTOR); clr_exception_intercept(svm, BP_VECTOR); - if (svm->nmi_singlestep) - set_exception_intercept(svm, DB_VECTOR); - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) set_exception_intercept(svm, BP_VECTOR); } else @@ -1754,7 +1751,6 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -1789,6 +1785,12 @@ static int ud_interception(struct vcpu_svm *svm) return 1; } +static int ac_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); + return 1; +} + static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3359,6 +3361,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -3741,7 +3744,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4317,7 +4319,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; } - vmcb->control.next_rip = info->next_rip; + /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ + if (static_cpu_has(X86_FEATURE_NRIPS)) + vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; vmexit = nested_svm_exit_handled(svm); @@ -4355,7 +4359,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .update_db_bp_intercept = update_db_bp_intercept, + .update_db_bp_intercept = update_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7c7bc8bef21fd..21dda139eb3ad 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -250,7 +250,7 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) + EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d73807f0d317..945f9e13f1aa6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) u32 eb; eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << NM_VECTOR) | (1u << DB_VECTOR); + (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -3652,20 +3652,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!is_paging(vcpu)) { hw_cr4 &= ~X86_CR4_PAE; hw_cr4 |= X86_CR4_PSE; - /* - * SMEP/SMAP is disabled if CPU is in non-paging mode - * in hardware. However KVM always uses paging mode to - * emulate guest non-paging mode with TDP. - * To emulate this behavior, SMEP/SMAP needs to be - * manually disabled when guest switches to non-paging - * mode. - */ - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); } else if (!(cr4 & X86_CR4_PAE)) { hw_cr4 &= ~X86_CR4_PAE; } } + if (!enable_unrestricted_guest && !is_paging(vcpu)) + /* + * SMEP/SMAP is disabled if CPU is in non-paging mode in + * hardware. However KVM always uses paging mode without + * unrestricted guest. + * To emulate this behavior, SMEP/SMAP needs to be manually + * disabled when guest switches to non-paging mode. + */ + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); + vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); return 0; @@ -5127,6 +5128,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { + case AC_VECTOR: + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & @@ -6144,6 +6148,8 @@ static __init int hardware_setup(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea306adbbc136..41a3fb4ed346a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -940,7 +940,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -2117,6 +2117,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu) static void record_steal_time(struct kvm_vcpu *vcpu) { + accumulate_steal_time(vcpu); + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -2192,7 +2194,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; - kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); + adjust_tsc_offset_guest(vcpu, adj); } vcpu->arch.ia32_tsc_adjust_msr = data; } @@ -2262,12 +2264,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & KVM_MSR_ENABLED)) break; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - - preempt_disable(); - accumulate_steal_time(vcpu); - preempt_enable(); - kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); break; @@ -2966,7 +2962,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; } - accumulate_steal_time(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); } @@ -4117,16 +4112,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } @@ -6370,12 +6366,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * KVM_DEBUGREG_WONT_EXIT again. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { - int i; - WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops->sync_dirty_debug_regs(vcpu); - for (i = 0; i < KVM_NR_DB_REGS; i++) - vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + kvm_update_dr0123(vcpu); + kvm_update_dr6(vcpu); + kvm_update_dr7(vcpu); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b868124128d7..274a52b1183ea 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -29,7 +29,6 @@ #include #include -#include #include #include @@ -185,7 +184,7 @@ void math_emulate(struct math_emu_info *info) math_abort(FPU_info, SIGILL); } - code_descriptor = LDT_DESCRIPTOR(FPU_CS); + code_descriptor = FPU_get_ldt_descriptor(FPU_CS); if (SEG_D_SIZE(code_descriptor)) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2c614410a5f39..d342fce49447c 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,9 +16,24 @@ #include #include -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#include +#include + +static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) +{ + static struct desc_struct zero_desc; + struct desc_struct ret = zero_desc; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + seg >>= 3; + mutex_lock(¤t->mm->context.lock); + if (current->mm->context.ldt && seg < current->mm->context.ldt->size) + ret = current->mm->context.ldt->entries[seg]; + mutex_unlock(¤t->mm->context.lock); +#endif + return ret; +} + #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 6ef5e99380f92..8300db71c2a62 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -20,7 +20,6 @@ #include #include -#include #include "fpu_system.h" #include "exception.h" @@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, addr->selector = PM_REG_(segment); } - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + descriptor = FPU_get_ldt_descriptor(addr->selector); base_address = SEG_BASE_ADDR(descriptor); address = base_address + offset; limit = base_address diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 181c53bac3a7e..62855ac37ab7e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -285,6 +285,9 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; + if (pmd_huge(*pmd_k)) + return 0; + pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; @@ -356,8 +359,6 @@ void vmalloc_sync_all(void) * 64-bit: * * Handle a fault on the vmalloc area - * - * This assumes no large pages in there. */ static noinline int vmalloc_fault(unsigned long address) { @@ -399,17 +400,23 @@ static noinline int vmalloc_fault(unsigned long address) if (pud_none(*pud_ref)) return -1; - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) + if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) BUG(); + if (pud_huge(*pud)) + return 0; + pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); if (pmd_none(*pmd_ref)) return -1; - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) + if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) BUG(); + if (pmd_huge(*pmd)) + return 0; + pte_ref = pte_offset_kernel(pmd_ref, address); if (!pte_present(*pte_ref)) return -1; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c8140e12816a5..c23ab1ee3a9a9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,7 @@ page_table_range_init_count(unsigned long start, unsigned long end) vaddr = start; pgd_idx = pgd_index(vaddr); + pmd_idx = pmd_index(vaddr); for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3fba623e3ba55..f9977a7a94448 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,7 +1132,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906c6b9f8..9a54dbe980643 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -11,7 +11,19 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -extern unsigned char kasan_zero_page[PAGE_SIZE]; +static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; static int __init map_range(struct range *range) { @@ -36,7 +48,7 @@ static void __init clear_pgds(unsigned long start, pgd_clear(pgd_offset_k(start)); } -void __init kasan_map_early_shadow(pgd_t *pgd) +static void __init kasan_map_early_shadow(pgd_t *pgd) { int i; unsigned long start = KASAN_SHADOW_START; @@ -73,7 +85,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { WARN_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PMD_SIZE; pmd = pmd_offset(pud, addr); } @@ -99,7 +111,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { WARN_ON(!pud_none(*pud)); set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PUD_SIZE; pud = pud_offset(pgd, addr); } @@ -124,7 +136,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end) while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { WARN_ON(!pgd_none(*pgd)); set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PGDIR_SIZE; pgd = pgd_offset_k(addr); } @@ -166,6 +178,26 @@ static struct notifier_block kasan_die_notifier = { }; #endif +void __init kasan_early_init(void) +{ + int i; + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; + + for (i = 0; i < PTRS_PER_PTE; i++) + kasan_zero_pte[i] = __pte(pte_val); + + for (i = 0; i < PTRS_PER_PMD; i++) + kasan_zero_pmd[i] = __pmd(pmd_val); + + for (i = 0; i < PTRS_PER_PUD; i++) + kasan_zero_pud[i] = __pud(pud_val); + + kasan_map_early_shadow(early_level4_pgt); + kasan_map_early_shadow(init_level4_pgt); +} + void __init kasan_init(void) { int i; @@ -176,6 +208,7 @@ void __init kasan_init(void) memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); load_cr3(early_level4_pgt); + __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -202,5 +235,6 @@ void __init kasan_init(void) memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + __flush_tlb_all(); init_task.kasan_depth = 0; } diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 9d518d693b4b7..844b06d67df4d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_MPX) + return "[mpx]"; + return NULL; +} diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c439ec4782160..6a3c774eaff69 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -18,26 +18,9 @@ #include #include -static const char *mpx_mapping_name(struct vm_area_struct *vma) -{ - return "[mpx]"; -} - -static struct vm_operations_struct mpx_vma_ops = { - .name = mpx_mapping_name, -}; - -static int is_mpx_vma(struct vm_area_struct *vma) -{ - return (vma->vm_ops == &mpx_vma_ops); -} - /* * This is really a simplified "vm_mmap". it only handles MPX * bounds tables (the bounds directory is user-allocated). - * - * Later on, we use the vma->vm_ops to uniquely identify these - * VMAs. */ static unsigned long mpx_mmap(unsigned long len) { @@ -83,7 +66,6 @@ static unsigned long mpx_mmap(unsigned long len) ret = -ENOMEM; goto out; } - vma->vm_ops = &mpx_vma_ops; if (vm_flags & VM_LOCKED) { up_write(&mm->mmap_sem); @@ -138,19 +120,19 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, switch (type) { case REG_TYPE_RM: regno = X86_MODRM_RM(insn->modrm.value); - if (X86_REX_B(insn->rex_prefix.value) == 1) + if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_INDEX: regno = X86_SIB_INDEX(insn->sib.value); - if (X86_REX_X(insn->rex_prefix.value) == 1) + if (X86_REX_X(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_BASE: regno = X86_SIB_BASE(insn->sib.value); - if (X86_REX_B(insn->rex_prefix.value) == 1) + if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; @@ -160,7 +142,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, break; } - if (regno > nr_registers) { + if (regno >= nr_registers) { WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } @@ -661,7 +643,7 @@ static int zap_bt_entries(struct mm_struct *mm, * so stop immediately and return an error. This * probably results in a SIGSEGV. */ - if (!is_mpx_vma(vma)) + if (!(vma->vm_flags & VM_MPX)) return -EINVAL; len = min(vma->vm_end, end) - addr; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 89af288ec6740..2dd9b3ad3bb58 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -33,7 +33,7 @@ struct cpa_data { pgd_t *pgd; pgprot_t mask_set; pgprot_t mask_clr; - int numpages; + unsigned long numpages; int flags; unsigned long pfn; unsigned force_split : 1; @@ -1324,7 +1324,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) * CPA operation. Either a large page has been * preserved or a single page update happened. */ - BUG_ON(cpa->numpages > numpages); + BUG_ON(cpa->numpages > numpages || !cpa->numpages); numpages -= cpa->numpages; if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) cpa->curpage++; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3250f2371aea5..061e0114005e5 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -117,7 +117,7 @@ static void flush_tlb_func(void *info) } else { unsigned long addr; unsigned long nr_pages = - f->flush_end - f->flush_start / PAGE_SIZE; + (f->flush_end - f->flush_start) / PAGE_SIZE; addr = f->flush_start; while (addr < f->flush_end) { __flush_tlb_single(addr); @@ -160,7 +160,10 @@ void flush_tlb_current_task(void) preempt_disable(); count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + + /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -187,17 +190,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); - if (current->active_mm != mm) + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; + } if (!current->mm) { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; } if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) base_pages_to_flush = (end - start) >> PAGE_SHIFT; + /* + * Both branches below are implicit full barriers (MOV to CR or + * INVLPG) that synchronize with switch_mm. + */ if (base_pages_to_flush > tlb_single_page_flush_ceiling) { base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); @@ -227,10 +242,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) preempt_disable(); if (current->active_mm == mm) { - if (current->mm) + if (current->mm) { + /* + * Implicit full barrier (INVLPG) that synchronizes + * with switch_mm. + */ __flush_tlb_one(start); - else + } else { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 14a63ed6fe092..ff9911707160a 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -81,6 +81,17 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), }, }, + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */ + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */ + { + .callback = set_use_crs, + .ident = "Foxconn K8M890-8237A", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"), + DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, /* Now for the blacklist.. */ @@ -121,8 +132,10 @@ void __init pci_acpi_crs_quirks(void) { int year; - if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) - pci_use_crs = false; + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) { + if (iomem_resource.end <= 0xffffffff) + pci_use_crs = false; + } dmi_check_system(pci_crs_quirks); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 02744df576d52..477384985ac95 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -678,6 +678,70 @@ static void *realloc_pages(void *old_memmap, int old_shift) return ret; } +/* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return memmap.map_end - memmap.desc_size; + + entry -= memmap.desc_size; + if (entry < memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return memmap.map; + + entry += memmap.desc_size; + if (entry >= memmap.map_end) + return NULL; + + return entry; +} + /* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. @@ -688,7 +752,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift) unsigned long left = 0; efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) { #ifdef CONFIG_X86_64 @@ -946,6 +1011,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) static int __init arch_parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); if (parse_option_str(str, "debug")) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 757678fb26e1a..bf9384488399c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -23,6 +23,7 @@ #include #include /* pcntxt_mask */ #include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -154,7 +155,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */ } /** diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e88fda867a33b..484145368a241 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,7 +8,7 @@ config XEN select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_TSC + depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -17,7 +17,7 @@ config XEN config XEN_DOM0 def_bool y depends on XEN && PCI_XEN && SWIOTLB_XEN - depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + depends on X86_IO_APIC && ACPI && PCI config XEN_PVHVM def_bool y diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7322755f337af..4b6e29ac0968c 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,13 +13,13 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o + p2m.o apic.o obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += apic.o vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060e..a10ed8915bf49 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,10 @@ #include #include +#ifdef CONFIG_KEXEC +#include +#endif + #include #include #include @@ -483,6 +487,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +497,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +534,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +543,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } @@ -1758,6 +1802,21 @@ static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; +#ifdef CONFIG_KEXEC +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -1777,6 +1836,10 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif } #endif diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 53b4c0811f4f6..6d3415144dab4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -32,7 +32,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + if (!suspend_cancelled) + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549d..bef30cbb56c47 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -101,17 +101,15 @@ struct dom0_vga_console_info; #ifdef CONFIG_XEN_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -void __init xen_init_apic(void); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) { } -static inline void __init xen_init_apic(void) -{ -} #endif +void __init xen_init_apic(void); + #ifdef CONFIG_XEN_EFI extern void xen_efi_init(void); #else diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 677bfcf4ee5dd..28f33a8b7f5f5 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -25,30 +25,39 @@ static inline void spill_registers(void) { #if XCHAL_NUM_AREGS > 16 __asm__ __volatile__ ( - " call12 1f\n" + " call8 1f\n" " _j 2f\n" " retw\n" " .align 4\n" "1:\n" +#if XCHAL_NUM_AREGS == 32 + " _entry a1, 32\n" + " addi a8, a0, 3\n" + " _entry a1, 16\n" + " mov a12, a12\n" + " retw\n" +#else " _entry a1, 48\n" - " addi a12, a0, 3\n" -#if XCHAL_NUM_AREGS > 32 - " .rept (" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n" + " call12 1f\n" + " retw\n" + " .align 4\n" + "1:\n" + " .rept (" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n" " _entry a1, 48\n" " mov a12, a0\n" " .endr\n" -#endif - " _entry a1, 48\n" + " _entry a1, 16\n" #if XCHAL_NUM_AREGS % 12 == 0 - " mov a8, a8\n" -#elif XCHAL_NUM_AREGS % 12 == 4 " mov a12, a12\n" -#elif XCHAL_NUM_AREGS % 12 == 8 +#elif XCHAL_NUM_AREGS % 12 == 4 " mov a4, a4\n" +#elif XCHAL_NUM_AREGS % 12 == 8 + " mov a8, a8\n" #endif " retw\n" +#endif "2:\n" - : : : "a12", "a13", "memory"); + : : : "a8", "a9", "memory"); #else __asm__ __volatile__ ( " mov a12, a12\n" diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 82bbfa5a05b34..a2a902140c4e5 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -568,12 +568,13 @@ user_exception_exit: * (if we have restored WSBITS-1 frames). */ +2: #if XCHAL_HAVE_THREADPTR l32i a3, a1, PT_THREADPTR wur a3, threadptr #endif -2: j common_exception_exit + j common_exception_exit /* This is the kernel exception exit. * We avoided to do a MOVSP when we entered the exception, but we @@ -1820,7 +1821,7 @@ ENDPROC(system_call) mov a12, a0 .endr #endif - _entry a1, 48 + _entry a1, 16 #if XCHAL_NUM_AREGS % 12 == 0 mov a8, a8 #elif XCHAL_NUM_AREGS % 12 == 4 @@ -1844,7 +1845,7 @@ ENDPROC(system_call) ENTRY(_switch_to) - entry a1, 16 + entry a1, 48 mov a11, a3 # and 'next' (a3) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5cbd5d9ea61dd..39ce74d10e2b2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -51,7 +51,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; - if (!bs) { + if (!bs || !bs->bio_integrity_pool) { bip = kmalloc(sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * nr_vecs, gfp_mask); inline_vecs = nr_vecs; @@ -104,7 +104,7 @@ void bio_integrity_free(struct bio *bio) kfree(page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset); - if (bs) { + if (bs && bs->bio_integrity_pool) { if (bip->bip_slab != BIO_POOL_NONE) bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); diff --git a/block/bio.c b/block/bio.c index f66a4eae16ee4..cbce3e2208f4c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1122,9 +1122,12 @@ int bio_uncopy_user(struct bio *bio) if (!bio_flagged(bio, BIO_NULL_MAPPED)) { /* * if we're in a workqueue, the request is orphaned, so - * don't copy into a random user address space, just free. + * don't copy into a random user address space, just free + * and return -EINTR so user space doesn't expect any data. */ - if (current->mm && bio_data_dir(bio) == READ) + if (!current->mm) + ret = -EINTR; + else if (bio_data_dir(bio) == READ) ret = bio_copy_to_iter(bio, bmd->iter); if (bmd->is_our_pages) bio_free_pages(bio); @@ -1814,8 +1817,9 @@ EXPORT_SYMBOL(bio_endio_nodec); * Allocates and returns a new bio which represents @sectors from the start of * @bio, and updates @bio to represent the remaining sectors. * - * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's - * responsibility to ensure that @bio is not freed before the split. + * Unless this is a discard request the newly allocated bio will point + * to @bio's bi_io_vec; it is the caller's responsibility to ensure that + * @bio is not freed before the split. */ struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) @@ -1825,7 +1829,15 @@ struct bio *bio_split(struct bio *bio, int sectors, BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); - split = bio_clone_fast(bio, gfp, bs); + /* + * Discards need a mutable bio_vec to accommodate the payload + * required by the DSM TRIM and UNMAP commands. + */ + if (bio->bi_rw & REQ_DISCARD) + split = bio_clone_bioset(bio, gfp, bs); + else + split = bio_clone_fast(bio, gfp, bs); + if (!split) return NULL; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ac817b750dbc..6817e28960b7e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -716,8 +716,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, return -EINVAL; disk = get_gendisk(MKDEV(major, minor), &part); - if (!disk || part) + if (!disk) return -EINVAL; + if (part) { + put_disk(disk); + return -EINVAL; + } rcu_read_lock(); spin_lock_irq(disk->queue->queue_lock); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index b79685e06b70e..279c5d674edf3 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -141,15 +141,26 @@ static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page) static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg) { - char *start_page = page; struct request *rq; + int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg); + + list_for_each_entry(rq, list, queuelist) { + const int rq_len = 2 * sizeof(rq) + 2; + + /* if the output will be truncated */ + if (PAGE_SIZE - 1 < len + rq_len) { + /* backspacing if it can't hold '\t...\n' */ + if (PAGE_SIZE - 1 < len + 5) + len -= rq_len; + len += snprintf(page + len, PAGE_SIZE - 1 - len, + "\t...\n"); + break; + } + len += snprintf(page + len, PAGE_SIZE - 1 - len, + "\t%p\n", rq); + } - page += sprintf(page, "%s:\n", msg); - - list_for_each_entry(rq, list, queuelist) - page += sprintf(page, "\t%p\n", rq); - - return page - start_page; + return len; } static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) diff --git a/block/blk-mq.c b/block/blk-mq.c index 594eea04266e6..2dc1fd6c5bdb5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1968,7 +1968,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_hctxs; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); - blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); + blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); q->nr_queues = nr_cpu_ids; q->nr_hw_queues = set->nr_hw_queues; diff --git a/block/blk-settings.c b/block/blk-settings.c index 12600bfffca93..e0057d035200c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -241,8 +241,8 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); * Description: * Enables a low level driver to set a hard upper limit, * max_hw_sectors, on the size of requests. max_hw_sectors is set by - * the device driver based upon the combined capabilities of I/O - * controller and storage device. + * the device driver based upon the capabilities of the I/O + * controller. * * max_sectors is a soft limit imposed by the block layer for * filesystem type requests. This value can be overridden on a diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index db201bca15819..523dd10e17512 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -698,7 +698,7 @@ struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name, err: if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f22cc56fd1b38..9641b74b53ef1 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -76,6 +76,8 @@ int af_alg_register_type(const struct af_alg_type *type) goto unlock; type->ops->owner = THIS_MODULE; + if (type->ops_nokey) + type->ops_nokey->owner = THIS_MODULE; node->type = type; list_add(&node->list, &alg_types); err = 0; @@ -125,6 +127,26 @@ int af_alg_release(struct socket *sock) } EXPORT_SYMBOL_GPL(af_alg_release); +void af_alg_release_parent(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + unsigned int nokey = ask->nokey_refcnt; + bool last = nokey && !ask->refcnt; + + sk = ask->parent; + ask = alg_sk(sk); + + lock_sock(sk); + ask->nokey_refcnt -= nokey; + if (!last) + last = !--ask->refcnt; + release_sock(sk); + + if (last) + sock_put(sk); +} +EXPORT_SYMBOL_GPL(af_alg_release_parent); + static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; @@ -132,6 +154,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_alg *sa = (void *)uaddr; const struct af_alg_type *type; void *private; + int err; if (sock->state == SS_CONNECTED) return -EINVAL; @@ -157,16 +180,22 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return PTR_ERR(private); } + err = -EBUSY; lock_sock(sk); + if (ask->refcnt | ask->nokey_refcnt) + goto unlock; swap(ask->type, type); swap(ask->private, private); + err = 0; + +unlock: release_sock(sk); alg_do_release(type, private); - return 0; + return err; } static int alg_setkey(struct sock *sk, char __user *ukey, @@ -199,11 +228,15 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; - int err = -ENOPROTOOPT; + int err = -EBUSY; lock_sock(sk); + if (ask->refcnt) + goto unlock; + type = ask->type; + err = -ENOPROTOOPT; if (level != SOL_ALG || !type) goto unlock; @@ -235,6 +268,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; struct sock *sk2; + unsigned int nokey; int err; lock_sock(sk); @@ -254,20 +288,29 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) security_sk_clone(sk, sk2); err = type->accept(ask->private, sk2); - if (err) { - sk_free(sk2); + + nokey = err == -ENOKEY; + if (nokey && type->accept_nokey) + err = type->accept_nokey(ask->private, sk2); + + if (err) goto unlock; - } sk2->sk_family = PF_ALG; - sock_hold(sk); + if (nokey || !ask->refcnt++) + sock_hold(sk); + ask->nokey_refcnt += nokey; alg_sk(sk2)->parent = sk; alg_sk(sk2)->type = type; + alg_sk(sk2)->nokey_refcnt = nokey; newsock->ops = type->ops; newsock->state = SS_CONNECTED; + if (nokey) + newsock->ops = type->ops_nokey; + err = 0; unlock: diff --git a/crypto/ahash.c b/crypto/ahash.c index 8acb886032ae7..d19b52324cf52 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -451,6 +451,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) struct ahash_alg *alg = crypto_ahash_alg(hash); hash->setkey = ahash_nosetkey; + hash->has_setkey = false; hash->export = ahash_no_export; hash->import = ahash_no_import; @@ -463,8 +464,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) hash->finup = alg->finup ?: ahash_def_finup; hash->digest = alg->digest; - if (alg->setkey) + if (alg->setkey) { hash->setkey = alg->setkey; + hash->has_setkey = true; + } if (alg->export) hash->export = alg->export; if (alg->import) @@ -544,7 +547,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg) struct crypto_alg *base = &alg->halg.base; if (alg->halg.digestsize > PAGE_SIZE / 8 || - alg->halg.statesize > PAGE_SIZE / 8) + alg->halg.statesize > PAGE_SIZE / 8 || + alg->halg.statesize == 0) return -EINVAL; base->cra_type = &crypto_ahash_type; diff --git a/crypto/algapi.c b/crypto/algapi.c index d2627a3d4ed8b..dda720c6ab08d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -337,7 +337,7 @@ static void crypto_wait_for_test(struct crypto_larval *larval) crypto_alg_tested(larval->alg.cra_driver_name, 0); } - err = wait_for_completion_interruptible(&larval->completion); + err = wait_for_completion_killable(&larval->completion); WARN_ON(err); out: diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 1396ad0787fc6..d7a3435280d89 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -34,6 +34,11 @@ struct hash_ctx { struct ahash_request req; }; +struct algif_hash_tfm { + struct crypto_ahash *hash; + bool has_key; +}; + static int hash_sendmsg(struct socket *sock, struct msghdr *msg, size_t ignored) { @@ -49,7 +54,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); if (!ctx->more) { - err = crypto_ahash_init(&ctx->req); + err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req), + &ctx->completion); if (err) goto unlock; } @@ -120,6 +126,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, } else { if (!ctx->more) { err = crypto_ahash_init(&ctx->req); + err = af_alg_wait_for_completion(err, &ctx->completion); if (err) goto unlock; } @@ -227,19 +234,151 @@ static struct proto_ops algif_hash_ops = { .accept = hash_accept, }; +static int hash_check_key(struct socket *sock) +{ + int err = 0; + struct sock *psk; + struct alg_sock *pask; + struct algif_hash_tfm *tfm; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + + lock_sock(sk); + if (ask->refcnt) + goto unlock_child; + + psk = ask->parent; + pask = alg_sk(ask->parent); + tfm = pask->private; + + err = -ENOKEY; + lock_sock_nested(psk, SINGLE_DEPTH_NESTING); + if (!tfm->has_key) + goto unlock; + + if (!pask->refcnt++) + sock_hold(psk); + + ask->refcnt = 1; + sock_put(psk); + + err = 0; + +unlock: + release_sock(psk); +unlock_child: + release_sock(sk); + + return err; +} + +static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t size) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_sendmsg(sock, msg, size); +} + +static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_sendpage(sock, page, offset, size, flags); +} + +static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_recvmsg(sock, msg, ignored, flags); +} + +static int hash_accept_nokey(struct socket *sock, struct socket *newsock, + int flags) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_accept(sock, newsock, flags); +} + +static struct proto_ops algif_hash_ops_nokey = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .setsockopt = sock_no_setsockopt, + .poll = sock_no_poll, + + .release = af_alg_release, + .sendmsg = hash_sendmsg_nokey, + .sendpage = hash_sendpage_nokey, + .recvmsg = hash_recvmsg_nokey, + .accept = hash_accept_nokey, +}; + static void *hash_bind(const char *name, u32 type, u32 mask) { - return crypto_alloc_ahash(name, type, mask); + struct algif_hash_tfm *tfm; + struct crypto_ahash *hash; + + tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); + if (!tfm) + return ERR_PTR(-ENOMEM); + + hash = crypto_alloc_ahash(name, type, mask); + if (IS_ERR(hash)) { + kfree(tfm); + return ERR_CAST(hash); + } + + tfm->hash = hash; + + return tfm; } static void hash_release(void *private) { - crypto_free_ahash(private); + struct algif_hash_tfm *tfm = private; + + crypto_free_ahash(tfm->hash); + kfree(tfm); } static int hash_setkey(void *private, const u8 *key, unsigned int keylen) { - return crypto_ahash_setkey(private, key, keylen); + struct algif_hash_tfm *tfm = private; + int err; + + err = crypto_ahash_setkey(tfm->hash, key, keylen); + tfm->has_key = !err; + + return err; } static void hash_sock_destruct(struct sock *sk) @@ -253,12 +392,14 @@ static void hash_sock_destruct(struct sock *sk) af_alg_release_parent(sk); } -static int hash_accept_parent(void *private, struct sock *sk) +static int hash_accept_parent_nokey(void *private, struct sock *sk) { struct hash_ctx *ctx; struct alg_sock *ask = alg_sk(sk); - unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private); - unsigned ds = crypto_ahash_digestsize(private); + struct algif_hash_tfm *tfm = private; + struct crypto_ahash *hash = tfm->hash; + unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash); + unsigned ds = crypto_ahash_digestsize(hash); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) @@ -278,7 +419,7 @@ static int hash_accept_parent(void *private, struct sock *sk) ask->private = ctx; - ahash_request_set_tfm(&ctx->req, private); + ahash_request_set_tfm(&ctx->req, hash); ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); @@ -287,12 +428,24 @@ static int hash_accept_parent(void *private, struct sock *sk) return 0; } +static int hash_accept_parent(void *private, struct sock *sk) +{ + struct algif_hash_tfm *tfm = private; + + if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash)) + return -ENOKEY; + + return hash_accept_parent_nokey(private, sk); +} + static const struct af_alg_type algif_type_hash = { .bind = hash_bind, .release = hash_release, .setkey = hash_setkey, .accept = hash_accept_parent, + .accept_nokey = hash_accept_parent_nokey, .ops = &algif_hash_ops, + .ops_nokey = &algif_hash_ops_nokey, .name = "hash", .owner = THIS_MODULE }; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 945075292bc95..c0f03562a1457 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -31,6 +31,11 @@ struct skcipher_sg_list { struct scatterlist sg[0]; }; +struct skcipher_tfm { + struct crypto_ablkcipher *skcipher; + bool has_key; +}; + struct skcipher_ctx { struct list_head tsgl; struct af_alg_sgl rsgl; @@ -387,7 +392,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); sg = sgl->sg; - sg_unmark_end(sg + sgl->cur); + if (sgl->cur) + sg_unmark_end(sg + sgl->cur - 1); do { i = sgl->cur; plen = min_t(int, len, PAGE_SIZE); @@ -749,19 +755,139 @@ static struct proto_ops algif_skcipher_ops = { .poll = skcipher_poll, }; +static int skcipher_check_key(struct socket *sock) +{ + int err = 0; + struct sock *psk; + struct alg_sock *pask; + struct skcipher_tfm *tfm; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + + lock_sock(sk); + if (ask->refcnt) + goto unlock_child; + + psk = ask->parent; + pask = alg_sk(ask->parent); + tfm = pask->private; + + err = -ENOKEY; + lock_sock_nested(psk, SINGLE_DEPTH_NESTING); + if (!tfm->has_key) + goto unlock; + + if (!pask->refcnt++) + sock_hold(psk); + + ask->refcnt = 1; + sock_put(psk); + + err = 0; + +unlock: + release_sock(psk); +unlock_child: + release_sock(sk); + + return err; +} + +static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t size) +{ + int err; + + err = skcipher_check_key(sock); + if (err) + return err; + + return skcipher_sendmsg(sock, msg, size); +} + +static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + int err; + + err = skcipher_check_key(sock); + if (err) + return err; + + return skcipher_sendpage(sock, page, offset, size, flags); +} + +static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) +{ + int err; + + err = skcipher_check_key(sock); + if (err) + return err; + + return skcipher_recvmsg(sock, msg, ignored, flags); +} + +static struct proto_ops algif_skcipher_ops_nokey = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .accept = sock_no_accept, + .setsockopt = sock_no_setsockopt, + + .release = af_alg_release, + .sendmsg = skcipher_sendmsg_nokey, + .sendpage = skcipher_sendpage_nokey, + .recvmsg = skcipher_recvmsg_nokey, + .poll = skcipher_poll, +}; + static void *skcipher_bind(const char *name, u32 type, u32 mask) { - return crypto_alloc_ablkcipher(name, type, mask); + struct skcipher_tfm *tfm; + struct crypto_ablkcipher *skcipher; + + tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); + if (!tfm) + return ERR_PTR(-ENOMEM); + + skcipher = crypto_alloc_ablkcipher(name, type, mask); + if (IS_ERR(skcipher)) { + kfree(tfm); + return ERR_CAST(skcipher); + } + + tfm->skcipher = skcipher; + + return tfm; } static void skcipher_release(void *private) { - crypto_free_ablkcipher(private); + struct skcipher_tfm *tfm = private; + + crypto_free_ablkcipher(tfm->skcipher); + kfree(tfm); } static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen) { - return crypto_ablkcipher_setkey(private, key, keylen); + struct skcipher_tfm *tfm = private; + int err; + + err = crypto_ablkcipher_setkey(tfm->skcipher, key, keylen); + tfm->has_key = !err; + + return err; } static void skcipher_wait(struct sock *sk) @@ -789,24 +915,26 @@ static void skcipher_sock_destruct(struct sock *sk) af_alg_release_parent(sk); } -static int skcipher_accept_parent(void *private, struct sock *sk) +static int skcipher_accept_parent_nokey(void *private, struct sock *sk) { struct skcipher_ctx *ctx; struct alg_sock *ask = alg_sk(sk); - unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private); + struct skcipher_tfm *tfm = private; + struct crypto_ablkcipher *skcipher = tfm->skcipher; + unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(skcipher); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private), + ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(skcipher), GFP_KERNEL); if (!ctx->iv) { sock_kfree_s(sk, ctx, len); return -ENOMEM; } - memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private)); + memset(ctx->iv, 0, crypto_ablkcipher_ivsize(skcipher)); INIT_LIST_HEAD(&ctx->tsgl); ctx->len = len; @@ -819,7 +947,7 @@ static int skcipher_accept_parent(void *private, struct sock *sk) ask->private = ctx; - ablkcipher_request_set_tfm(&ctx->req, private); + ablkcipher_request_set_tfm(&ctx->req, skcipher); ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); @@ -828,12 +956,24 @@ static int skcipher_accept_parent(void *private, struct sock *sk) return 0; } +static int skcipher_accept_parent(void *private, struct sock *sk) +{ + struct skcipher_tfm *tfm = private; + + if (!tfm->has_key) + return -ENOKEY; + + return skcipher_accept_parent_nokey(private, sk); +} + static const struct af_alg_type algif_type_skcipher = { .bind = skcipher_bind, .release = skcipher_release, .setkey = skcipher_setkey, .accept = skcipher_accept_parent, + .accept_nokey = skcipher_accept_parent_nokey, .ops = &algif_skcipher_ops, + .ops_nokey = &algif_skcipher_ops_nokey, .name = "skcipher", .owner = THIS_MODULE }; diff --git a/crypto/api.c b/crypto/api.c index afe4610afc4b9..bbc147cb5dec8 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -172,7 +172,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) struct crypto_larval *larval = (void *)alg; long timeout; - timeout = wait_for_completion_interruptible_timeout( + timeout = wait_for_completion_killable_timeout( &larval->completion, 60 * HZ); alg = larval->adult; @@ -445,7 +445,7 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) err: if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } @@ -562,7 +562,7 @@ void *crypto_alloc_tfm(const char *alg_name, err: if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h index f97330886d587..3f5b537ab33ec 100644 --- a/crypto/asymmetric_keys/asymmetric_keys.h +++ b/crypto/asymmetric_keys/asymmetric_keys.h @@ -11,6 +11,9 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id); +extern int __asymmetric_key_hex_to_key_id(const char *id, + struct asymmetric_key_id *match_id, + size_t hexlen); static inline const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key) { diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index bcbbbd794e1da..b0e4ed23d6683 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -104,6 +104,15 @@ static bool asymmetric_match_key_ids( return false; } +/* helper function can be called directly with pre-allocated memory */ +inline int __asymmetric_key_hex_to_key_id(const char *id, + struct asymmetric_key_id *match_id, + size_t hexlen) +{ + match_id->len = hexlen; + return hex2bin(match_id->data, id, hexlen); +} + /** * asymmetric_key_hex_to_key_id - Convert a hex string into a key ID. * @id: The ID as a hex string. @@ -111,21 +120,20 @@ static bool asymmetric_match_key_ids( struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id) { struct asymmetric_key_id *match_id; - size_t hexlen; + size_t asciihexlen; int ret; if (!*id) return ERR_PTR(-EINVAL); - hexlen = strlen(id); - if (hexlen & 1) + asciihexlen = strlen(id); + if (asciihexlen & 1) return ERR_PTR(-EINVAL); - match_id = kmalloc(sizeof(struct asymmetric_key_id) + hexlen / 2, + match_id = kmalloc(sizeof(struct asymmetric_key_id) + asciihexlen / 2, GFP_KERNEL); if (!match_id) return ERR_PTR(-ENOMEM); - match_id->len = hexlen / 2; - ret = hex2bin(match_id->data, id, hexlen / 2); + ret = __asymmetric_key_hex_to_key_id(id, match_id, asciihexlen / 2); if (ret < 0) { kfree(match_id); return ERR_PTR(-EINVAL); diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index a6c42031628e9..4c850ac474e20 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -28,17 +28,30 @@ static bool use_builtin_keys; static struct asymmetric_key_id *ca_keyid; #ifndef MODULE +static struct { + struct asymmetric_key_id id; + unsigned char data[10]; +} cakey; + static int __init ca_keys_setup(char *str) { if (!str) /* default system keyring */ return 1; if (strncmp(str, "id:", 3) == 0) { - struct asymmetric_key_id *p; - p = asymmetric_key_hex_to_key_id(str + 3); - if (p == ERR_PTR(-EINVAL)) - pr_err("Unparsable hex string in ca_keys\n"); - else if (!IS_ERR(p)) + struct asymmetric_key_id *p = &cakey.id; + size_t hexlen = (strlen(str) - 3) / 2; + int ret; + + if (hexlen == 0 || hexlen > sizeof(cakey.data)) { + pr_err("Missing or invalid ca_keys id\n"); + return 1; + } + + ret = __asymmetric_key_hex_to_key_id(str + 3, p, hexlen); + if (ret < 0) + pr_err("Unparsable ca_keys id hex string\n"); + else ca_keyid = p; /* owner key 'id:xxxxxx' */ } else if (strcmp(str, "builtin") == 0) { use_builtin_keys = true; @@ -294,10 +307,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) srlen = cert->raw_serial_size; q = cert->raw_serial; } - if (srlen > 1 && *q == 0) { - srlen--; - q++; - } ret = -ENOMEM; desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 06f1b60f02b22..4c0a0e2718769 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -172,4 +172,3 @@ MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crc32c"); MODULE_ALIAS_CRYPTO("crc32c-generic"); -MODULE_SOFTDEP("pre: crc32c"); diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 41dfe762b7fba..6a050e12fcdf1 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -381,7 +381,7 @@ static struct crypto_alg *crypto_user_aead_alg(const char *name, u32 type, err = PTR_ERR(alg); if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } @@ -499,6 +499,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; + down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) dump_alloc += CRYPTO_REPORT_MAXSIZE; @@ -508,8 +509,11 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) .done = link->done, .min_dump_alloc = dump_alloc, }; - return netlink_dump_start(crypto_nlsk, skb, nlh, &c); + err = netlink_dump_start(crypto_nlsk, skb, nlh, &c); } + up_read(&crypto_alg_sem); + + return err; } err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, diff --git a/crypto/shash.c b/crypto/shash.c index 47c713954bf30..03fbcd4a82c44 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -354,9 +354,10 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->final = shash_async_final; crt->finup = shash_async_finup; crt->digest = shash_async_digest; + crt->setkey = shash_async_setkey; + + crt->has_setkey = alg->setkey != shash_no_setkey; - if (alg->setkey) - crt->setkey = shash_async_setkey; if (alg->export) crt->export = shash_async_export; if (alg->import) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 37fb190476039..73f056a597a9d 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -352,13 +352,16 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(rentry->res); pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); - if (!pdata->mmio_base) - goto err_out; break; } acpi_dev_free_resource_list(&resource_list); + if (!pdata->mmio_base) { + ret = -ENOMEM; + goto err_out; + } + pdata->dev_desc = dev_desc; if (dev_desc->setup) diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c index ff6d8adc9cda6..fb765524cc3d5 100644 --- a/drivers/acpi/acpi_pnp.c +++ b/drivers/acpi/acpi_pnp.c @@ -153,6 +153,7 @@ static const struct acpi_device_id acpi_pnp_device_ids[] = { {"AEI0250"}, /* PROLiNK 1456VH ISA PnP K56flex Fax Modem */ {"AEI1240"}, /* Actiontec ISA PNP 56K X2 Fax Modem */ {"AKY1021"}, /* Rockwell 56K ACF II Fax+Data+Voice Modem */ + {"ALI5123"}, /* ALi Fast Infrared Controller */ {"AZT4001"}, /* AZT3005 PnP SOUND DEVICE */ {"BDP3336"}, /* Best Data Products Inc. Smart One 336F PnP Modem */ {"BRI0A49"}, /* Boca Complete Ofc Communicator 14.4 Data-FAX */ diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 87b27521fcacb..7f50dd9eb1d0e 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -213,6 +213,7 @@ struct acpi_table_list { #define ACPI_TABLE_INDEX_DSDT (0) #define ACPI_TABLE_INDEX_FACS (1) +#define ACPI_TABLE_INDEX_X_FACS (2) struct acpi_find_context { char *search_for; diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 7d2486005e3f2..05be59c772c75 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -350,9 +350,18 @@ void acpi_tb_parse_fadt(u32 table_index) /* If Hardware Reduced flag is set, there is no FACS */ if (!acpi_gbl_reduced_hardware) { - acpi_tb_install_fixed_table((acpi_physical_address) - acpi_gbl_FADT.Xfacs, ACPI_SIG_FACS, - ACPI_TABLE_INDEX_FACS); + if (acpi_gbl_FADT.facs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.facs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_FACS); + } + if (acpi_gbl_FADT.Xfacs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.Xfacs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_X_FACS); + } } } @@ -491,13 +500,9 @@ static void acpi_tb_convert_fadt(void) acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); /* - * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary. + * Expand the 32-bit DSDT addresses to 64-bit as necessary. * Later ACPICA code will always use the X 64-bit field. */ - acpi_gbl_FADT.Xfacs = acpi_tb_select_address("FACS", - acpi_gbl_FADT.facs, - acpi_gbl_FADT.Xfacs); - acpi_gbl_FADT.Xdsdt = acpi_tb_select_address("DSDT", acpi_gbl_FADT.dsdt, acpi_gbl_FADT.Xdsdt); diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 6559a58439c5d..2fb1afaacc6d6 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -68,7 +68,8 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); acpi_status acpi_tb_initialize_facs(void) { - acpi_status status; + struct acpi_table_facs *facs32; + struct acpi_table_facs *facs64; /* If Hardware Reduced flag is set, there is no FACS */ @@ -77,11 +78,22 @@ acpi_status acpi_tb_initialize_facs(void) return (AE_OK); } - status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, - ACPI_CAST_INDIRECT_PTR(struct - acpi_table_header, - &acpi_gbl_FACS)); - return (status); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &facs32)); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &facs64)); + + if (acpi_gbl_use32_bit_facs_addresses) { + acpi_gbl_FACS = facs32 ? facs32 : facs64; + } else { + acpi_gbl_FACS = facs64 ? facs64 : facs32; + } + + return (AE_OK); } #endif /* !ACPI_REDUCED_HARDWARE */ @@ -101,7 +113,7 @@ acpi_status acpi_tb_initialize_facs(void) u8 acpi_tb_tables_loaded(void) { - if (acpi_gbl_root_table_list.current_table_count >= 3) { + if (acpi_gbl_root_table_list.current_table_count >= 4) { return (TRUE); } @@ -357,11 +369,11 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address) table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header)); /* - * First two entries in the table array are reserved for the DSDT - * and FACS, which are not actually present in the RSDT/XSDT - they - * come from the FADT + * First three entries in the table array are reserved for the DSDT + * and 32bit/64bit FACS, which are not actually present in the + * RSDT/XSDT - they come from the FADT */ - acpi_gbl_root_table_list.current_table_count = 2; + acpi_gbl_root_table_list.current_table_count = 3; /* Initialize the root table array from the RSDT/XSDT */ diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index aadb3002a2ddd..b63e35d6d1bf8 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -166,7 +166,8 @@ static acpi_status acpi_tb_load_namespace(void) (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if ((!ACPI_COMPARE_NAME + if (!acpi_gbl_root_table_list.tables[i].address || + (!ACPI_COMPARE_NAME (&(acpi_gbl_root_table_list.tables[i].signature), ACPI_SIG_SSDT) && diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c index 083a768918892..42a32a66ef22a 100644 --- a/drivers/acpi/acpica/utxfinit.c +++ b/drivers/acpi/acpica/utxfinit.c @@ -179,10 +179,12 @@ acpi_status __init acpi_enable_subsystem(u32 flags) * Obtain a permanent mapping for the FACS. This is required for the * Global Lock and the Firmware Waking Vector */ - status = acpi_tb_initialize_facs(); - if (ACPI_FAILURE(status)) { - ACPI_WARNING((AE_INFO, "Could not map the FACS table")); - return_ACPI_STATUS(status); + if (!(flags & ACPI_NO_FACS_INIT)) { + status = acpi_tb_initialize_facs(); + if (ACPI_FAILURE(status)) { + ACPI_WARNING((AE_INFO, "Could not map the FACS table")); + return_ACPI_STATUS(status); + } } #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index c412fdb28d344..513e7230e3d04 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -470,6 +470,16 @@ static int __init acpi_bus_init_irq(void) return 0; } +/** + * acpi_early_init - Initialize ACPICA and populate the ACPI namespace. + * + * The ACPI tables are accessible after this, but the handling of events has not + * been initialized and the global lock is not available yet, so AML should not + * be executed at this point. + * + * Doing this before switching the EFI runtime services to virtual mode allows + * the EfiBootServices memory to be freed slightly earlier on boot. + */ void __init acpi_early_init(void) { acpi_status status; @@ -533,26 +543,42 @@ void __init acpi_early_init(void) acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi; } #endif + return; + + error0: + disable_acpi(); +} + +/** + * acpi_subsystem_init - Finalize the early initialization of ACPI. + * + * Switch over the platform to the ACPI mode (if possible), initialize the + * handling of ACPI events, install the interrupt and global lock handlers. + * + * Doing this too early is generally unsafe, but at the same time it needs to be + * done before all things that really depend on ACPI. The right spot appears to + * be before finalizing the EFI initialization. + */ +void __init acpi_subsystem_init(void) +{ + acpi_status status; + + if (acpi_disabled) + return; status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX "Unable to enable ACPI\n"); - goto error0; + disable_acpi(); + } else { + /* + * If the system is using ACPI then we can be reasonably + * confident that any regulators are managed by the firmware + * so tell the regulator core it has everything it needs to + * know. + */ + regulator_has_full_constraints(); } - - /* - * If the system is using ACPI then we can be reasonably - * confident that any regulators are managed by the firmware - * so tell the regulator core it has everything it needs to - * know. - */ - regulator_has_full_constraints(); - - return; - - error0: - disable_acpi(); - return; } static int __init acpi_bus_init(void) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 735db11a9b001..8217e0bda60f6 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -953,6 +953,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_prepare); */ void acpi_subsys_complete(struct device *dev) { + pm_generic_complete(dev); /* * If the device had been runtime-suspended before the system went into * the sleep state it is going out of and it has never been resumed till diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7ccba395c9ddb..98f5316aad727 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -182,7 +182,7 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -211,7 +211,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -1842,7 +1845,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0); diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index cfd7581cc19fa..b09ad554430ac 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -825,6 +825,22 @@ void acpi_penalize_isa_irq(int irq, int active) } } +/* + * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with + * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for + * PCI IRQs. + */ +void acpi_penalize_sci_irq(int irq, int trigger, int polarity) +{ + if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) { + if (trigger != ACPI_MADT_TRIGGER_LEVEL || + polarity != ACPI_MADT_POLARITY_ACTIVE_LOW) + acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS; + else + acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING; + } +} + /* * Over-ride default table to reserve additional IRQs for use by ISA * e.g. acpi_irq_isa=5 diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8244f013f2109..f1c966e050784 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -193,6 +193,7 @@ static bool acpi_decode_space(struct resource_win *win, u8 iodec = attr->granularity == 0xfff ? ACPI_DECODE_10 : ACPI_DECODE_16; bool wp = addr->info.mem.write_protect; u64 len = attr->address_length; + u64 start, end, offset = 0; struct resource *res = &win->res; /* @@ -204,9 +205,6 @@ static bool acpi_decode_space(struct resource_win *win, pr_debug("ACPI: Invalid address space min_addr_fix %d, max_addr_fix %d, len %llx\n", addr->min_address_fixed, addr->max_address_fixed, len); - res->start = attr->minimum; - res->end = attr->maximum; - /* * For bridges that translate addresses across the bridge, * translation_offset is the offset that must be added to the @@ -214,12 +212,22 @@ static bool acpi_decode_space(struct resource_win *win, * primary side. Non-bridge devices must list 0 for all Address * Translation offset bits. */ - if (addr->producer_consumer == ACPI_PRODUCER) { - res->start += attr->translation_offset; - res->end += attr->translation_offset; - } else if (attr->translation_offset) { + if (addr->producer_consumer == ACPI_PRODUCER) + offset = attr->translation_offset; + else if (attr->translation_offset) pr_debug("ACPI: translation_offset(%lld) is invalid for non-bridge device.\n", attr->translation_offset); + start = attr->minimum + offset; + end = attr->maximum + offset; + + win->offset = offset; + res->start = start; + res->end = end; + if (sizeof(resource_size_t) < sizeof(u64) && + (offset != win->offset || start != res->start || end != res->end)) { + pr_warn("acpi resource window ([%#llx-%#llx] ignored, not CPU addressable)\n", + attr->minimum, attr->maximum); + return false; } switch (addr->resource_type) { @@ -236,8 +244,6 @@ static bool acpi_decode_space(struct resource_win *win, return false; } - win->offset = attr->translation_offset; - if (addr->producer_consumer == ACPI_PRODUCER) res->flags |= IORESOURCE_WINDOW; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6607f3c6ace10..f1a26d937d984 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2074,7 +2074,7 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; - ptr += sizeof(void *); + ptr += sizeof(cookie); list_for_each_entry(w, &proc->delivered_death, entry) { struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 65ee94454bbd2..34825d63d4835 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -262,6 +262,26 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */ + { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b2), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b3), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b4), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b5), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b6), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b7), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19bE), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19bF), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c0), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c1), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c2), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c3), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c4), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c5), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c6), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c7), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */ { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */ @@ -312,6 +332,16 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x1f37), board_ahci_avn }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */ + { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */ @@ -342,6 +372,22 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, @@ -349,6 +395,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* JMicron 362B and 362C have an AHCI function with IDE class code */ { PCI_VDEVICE(JMICRON, 0x2362), board_ahci_ign_iferr }, { PCI_VDEVICE(JMICRON, 0x236f), board_ahci_ign_iferr }, + /* May need to update quirk_jmicron_async_suspend() for additions */ /* ATI */ { PCI_VDEVICE(ATI, 0x4380), board_ahci_sb600 }, /* ATI SB600 */ @@ -1377,18 +1424,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else if (pdev->vendor == 0x1c44 && pdev->device == 0x8000) ahci_pci_bar = AHCI_PCI_BAR_ENMOTUS; - /* - * The JMicron chip 361/363 contains one SATA controller and one - * PATA controller,for powering on these both controllers, we must - * follow the sequence one by one, otherwise one of them can not be - * powered on successfully, so here we disable the async suspend - * method for these chips. - */ - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && - (pdev->device == PCI_DEVICE_ID_JMICRON_JMB363 || - pdev->device == PCI_DEVICE_ID_JMICRON_JMB361)) - device_disable_async_suspend(&pdev->dev); - /* acquire resources */ rc = pcim_enable_device(pdev); if (rc) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 287c4ba0219f7..49840264dd578 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -495,8 +495,8 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) } } - /* fabricate port_map from cap.nr_ports */ - if (!port_map) { + /* fabricate port_map from cap.nr_ports for < AHCI 1.3 */ + if (!port_map && vers < 0x10300) { port_map = (1 << ahci_nr_ports(cap)) - 1; dev_warn(dev, "forcing PORTS_IMPL to 0x%x\n", port_map); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 577849c6611ac..e0064d180f041 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -694,11 +694,11 @@ static int ata_rwcmd_protocol(struct ata_taskfile *tf, struct ata_device *dev) * RETURNS: * Block address read from @tf. */ -u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) +u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev) { u64 block = 0; - if (!dev || tf->flags & ATA_TFLAG_LBA) { + if (tf->flags & ATA_TFLAG_LBA) { if (tf->flags & ATA_TFLAG_LBA48) { block |= (u64)tf->hob_lbah << 40; block |= (u64)tf->hob_lbam << 32; @@ -2147,24 +2147,6 @@ static int ata_dev_config_ncq(struct ata_device *dev, return 0; } -static void ata_dev_config_sense_reporting(struct ata_device *dev) -{ - unsigned int err_mask; - - if (!ata_id_has_sense_reporting(dev->id)) - return; - - if (ata_id_sense_reporting_enabled(dev->id)) - return; - - err_mask = ata_dev_set_feature(dev, SETFEATURE_SENSE_DATA, 0x1); - if (err_mask) { - ata_dev_dbg(dev, - "failed to enable Sense Data Reporting, Emask 0x%x\n", - err_mask); - } -} - /** * ata_dev_configure - Configure the specified ATA/ATAPI device * @dev: Target device to configure @@ -2387,7 +2369,7 @@ int ata_dev_configure(struct ata_device *dev) dev->devslp_timing[i] = sata_setting[j]; } } - ata_dev_config_sense_reporting(dev); + dev->cdb_len = 16; } @@ -2478,6 +2460,10 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_1024) + dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_1024, + dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) dev->max_sectors = ATA_MAX_SECTORS_LBA48; @@ -4146,6 +4132,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, { "Slimtype DVD A DS8A9SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, + /* + * Causes silent data corruption with higher max sects. + * http://lkml.kernel.org/g/x49wpy40ysk.fsf@segfault.boston.devel.redhat.com + */ + { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, + /* Devices we expect to fail diagnostics */ /* Devices where NCQ should be avoided */ @@ -4174,9 +4166,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - /* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */ + /* drives which fail FPDMA_AA activation (some may freeze afterwards) */ { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + { "VB0250EAVER", "HPG7", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ @@ -4225,11 +4218,11 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, /* devices that don't properly handle queued TRIM commands */ - { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Micron_M5[15]0*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + { "Micron_M5[15]0_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, @@ -4237,6 +4230,11 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 8*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + + /* devices that don't properly handle TRIM commands */ + { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT @@ -4501,7 +4499,8 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev) else /* In the ancient relic department - skip all of this */ return 0; - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + /* On some disks, this command causes spin-up, so we need longer timeout */ + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 15000); DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index cf0022ec07f24..cb0508af1459a 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1507,16 +1507,21 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, { struct ata_taskfile tf; unsigned int err_mask; + bool dma = false; DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page); +retry: ata_tf_init(dev, &tf); - if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id)) { + if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) && + !(dev->horkage & ATA_HORKAGE_NO_NCQ_LOG)) { tf.command = ATA_CMD_READ_LOG_DMA_EXT; tf.protocol = ATA_PROT_DMA; + dma = true; } else { tf.command = ATA_CMD_READ_LOG_EXT; tf.protocol = ATA_PROT_PIO; + dma = false; } tf.lbal = log; tf.lbam = page; @@ -1527,6 +1532,12 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, buf, sectors * ATA_SECT_SIZE, 0); + if (err_mask && dma) { + dev->horkage |= ATA_HORKAGE_NO_NCQ_LOG; + ata_dev_warn(dev, "READ LOG DMA EXT failed, trying unqueued\n"); + goto retry; + } + DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; } @@ -1581,8 +1592,6 @@ static int ata_eh_read_log_10h(struct ata_device *dev, tf->hob_lbah = buf[10]; tf->nsect = buf[12]; tf->hob_nsect = buf[13]; - if (ata_id_has_ncq_autosense(dev->id)) - tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; return 0; } @@ -1618,70 +1627,6 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) return err_mask; } -/** - * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT - * @dev: device to perform REQUEST_SENSE_SENSE_DATA_EXT to - * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) - * @dfl_sense_key: default sense key to use - * - * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK - * SENSE. This function is EH helper. - * - * LOCKING: - * Kernel thread context (may sleep). - * - * RETURNS: - * encoded sense data on success, 0 on failure or if sense data - * is not available. - */ -static u32 ata_eh_request_sense(struct ata_queued_cmd *qc, - struct scsi_cmnd *cmd) -{ - struct ata_device *dev = qc->dev; - struct ata_taskfile tf; - unsigned int err_mask; - - if (!cmd) - return 0; - - DPRINTK("ATA request sense\n"); - ata_dev_warn(dev, "request sense\n"); - if (!ata_id_sense_reporting_enabled(dev->id)) { - ata_dev_warn(qc->dev, "sense data reporting disabled\n"); - return 0; - } - ata_tf_init(dev, &tf); - - tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; - tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; - tf.command = ATA_CMD_REQ_SENSE_DATA; - tf.protocol = ATA_PROT_NODATA; - - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); - /* - * ACS-4 states: - * The device may set the SENSE DATA AVAILABLE bit to one in the - * STATUS field and clear the ERROR bit to zero in the STATUS field - * to indicate that the command returned completion without an error - * and the sense data described in table 306 is available. - * - * IOW the 'ATA_SENSE' bit might not be set even though valid - * sense data is available. - * So check for both. - */ - if ((tf.command & ATA_SENSE) || - tf.lbah != 0 || tf.lbam != 0 || tf.lbal != 0) { - ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - ata_dev_warn(dev, "sense data %02x/%02x/%02x\n", - tf.lbah, tf.lbam, tf.lbal); - } else { - ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", - tf.command, err_mask); - } - return err_mask; -} - /** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to @@ -1844,19 +1789,6 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) memcpy(&qc->result_tf, &tf, sizeof(tf)); qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; - if (qc->result_tf.auxiliary) { - char sense_key, asc, ascq; - - sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; - asc = (qc->result_tf.auxiliary >> 8) & 0xff; - ascq = qc->result_tf.auxiliary & 0xff; - ata_dev_dbg(dev, "NCQ Autosense %02x/%02x/%02x\n", - sense_key, asc, ascq); - ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); - ata_scsi_set_sense_information(qc->scsicmd, &qc->result_tf); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - } - ehc->i.err_mask &= ~AC_ERR_DEV; } @@ -1886,27 +1818,6 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - /* - * Sense data reporting does not work if the - * device fault bit is set. - */ - if ((stat & ATA_SENSE) && !(stat & ATA_DF) && - !(qc->flags & ATA_QCFLAG_SENSE_VALID)) { - if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { - tmp = ata_eh_request_sense(qc, qc->scsicmd); - if (tmp) - qc->err_mask |= tmp; - else - ata_scsi_set_sense_information(qc->scsicmd, tf); - } else { - ata_dev_warn(qc->dev, "sense data available but port frozen\n"); - } - } - - /* Set by NCQ autosense or request sense above */ - if (qc->flags & ATA_QCFLAG_SENSE_VALID) - return 0; - if (stat & (ATA_ERR | ATA_DF)) qc->err_mask |= AC_ERR_DEV; else @@ -2650,15 +2561,14 @@ static void ata_eh_link_report(struct ata_link *link) #ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | - ATA_SENSE | ATA_ERR)) { + ATA_ERR)) { if (res->command & ATA_BUSY) ata_dev_err(qc->dev, "status: { Busy }\n"); else - ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", + ata_dev_err(qc->dev, "status: { %s%s%s%s}\n", res->command & ATA_DRDY ? "DRDY " : "", res->command & ATA_DF ? "DF " : "", res->command & ATA_DRQ ? "DRQ " : "", - res->command & ATA_SENSE ? "SENSE " : "", res->command & ATA_ERR ? "ERR " : ""); } diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 7ccc084bf1dfb..85aa76116a305 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -460,6 +460,13 @@ static void sata_pmp_quirks(struct ata_port *ap) ATA_LFLAG_NO_SRST | ATA_LFLAG_ASSUME_ATA; } + } else if (vendor == 0x11ab && devid == 0x4140) { + /* Marvell 4140 quirks */ + ata_for_each_link(link, ap, EDGE) { + /* port 4 is for SEMB device and it doesn't like SRST */ + if (link->pmp == 4) + link->flags |= ATA_LFLAG_DISABLED; + } } } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3131adcc1f87e..ae7cfcb562dca 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -270,28 +270,13 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); -void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { - if (!cmd) - return; - cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); } -void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, - const struct ata_taskfile *tf) -{ - u64 information; - - if (!cmd) - return; - - information = ata_tf_read_block(tf, NULL); - scsi_set_sense_information(cmd->sense_buffer, information); -} - static ssize_t ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -690,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap) int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, int cmd, void __user *arg) { - int val = -EINVAL, rc = -EINVAL; + unsigned long val; + int rc = -EINVAL; unsigned long flags; switch (cmd) { - case ATA_IOC_GET_IO32: + case HDIO_GET_32BIT: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); - if (copy_to_user(arg, &val, 1)) - return -EFAULT; - return 0; + return put_user(val, (unsigned long __user *)arg); - case ATA_IOC_SET_IO32: + case HDIO_SET_32BIT: val = (unsigned long) arg; rc = 0; spin_lock_irqsave(ap->lock, flags); @@ -1792,9 +1776,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) ((cdb[2] & 0x20) || need_sense)) { ata_gen_passthru_sense(qc); } else { - if (qc->flags & ATA_QCFLAG_SENSE_VALID) { - cmd->result = SAM_STAT_CHECK_CONDITION; - } else if (!need_sense) { + if (!need_sense) { cmd->result = SAM_STAT_GOOD; } else { /* TODO: decide which descriptor format to use @@ -2568,7 +2550,8 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; - if (ata_id_has_trim(args->id)) { + if (ata_id_has_trim(args->id) && + !(dev->horkage & ATA_HORKAGE_NOTRIM)) { rbuf[14] |= 0x80; /* LBPME */ if (ata_id_has_zero_after_trim(args->id) && diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index cdf6215a9a22b..7dbba387d12a7 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -997,12 +997,9 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap, static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) { struct ata_port *ap = qc->ap; - unsigned long flags; if (ap->ops->error_handler) { if (in_wq) { - spin_lock_irqsave(ap->lock, flags); - /* EH might have kicked in while host lock is * released. */ @@ -1014,8 +1011,6 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) } else ata_port_freeze(ap); } - - spin_unlock_irqrestore(ap->lock, flags); } else { if (likely(!(qc->err_mask & AC_ERR_HSM))) ata_qc_complete(qc); @@ -1024,10 +1019,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) } } else { if (in_wq) { - spin_lock_irqsave(ap->lock, flags); ata_sff_irq_on(ap); ata_qc_complete(qc); - spin_unlock_irqrestore(ap->lock, flags); } else ata_qc_complete(qc); } @@ -1048,9 +1041,10 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, { struct ata_link *link = qc->dev->link; struct ata_eh_info *ehi = &link->eh_info; - unsigned long flags = 0; int poll_next; + lockdep_assert_held(ap->lock); + WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0); /* Make sure ata_sff_qc_issue() does not throw things @@ -1112,14 +1106,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, } } - /* Send the CDB (atapi) or the first data block (ata pio out). - * During the state transition, interrupt handler shouldn't - * be invoked before the data transfer is complete and - * hsm_task_state is changed. Hence, the following locking. - */ - if (in_wq) - spin_lock_irqsave(ap->lock, flags); - if (qc->tf.protocol == ATA_PROT_PIO) { /* PIO data out protocol. * send first data block. @@ -1135,9 +1121,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, /* send CDB */ atapi_send_cdb(ap, qc); - if (in_wq) - spin_unlock_irqrestore(ap->lock, flags); - /* if polling, ata_sff_pio_task() handles the rest. * otherwise, interrupt handler takes over from here. */ @@ -1361,12 +1344,14 @@ static void ata_sff_pio_task(struct work_struct *work) u8 status; int poll_next; + spin_lock_irq(ap->lock); + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ qc = ata_qc_from_tag(ap, link->active_tag); if (!qc) { ap->sff_pio_task_link = NULL; - return; + goto out_unlock; } fsm_start: @@ -1381,11 +1366,14 @@ static void ata_sff_pio_task(struct work_struct *work) */ status = ata_sff_busy_wait(ap, ATA_BUSY, 5); if (status & ATA_BUSY) { + spin_unlock_irq(ap->lock); ata_msleep(ap, 2); + spin_lock_irq(ap->lock); + status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); - return; + goto out_unlock; } } @@ -1402,6 +1390,8 @@ static void ata_sff_pio_task(struct work_struct *work) */ if (poll_next) goto fsm_start; +out_unlock: + spin_unlock_irq(ap->lock); } /** diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 3227b7c8a05f8..e2d94972962d6 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -560,6 +560,29 @@ show_ata_dev_gscr(struct device *dev, static DEVICE_ATTR(gscr, S_IRUGO, show_ata_dev_gscr, NULL); +static ssize_t +show_ata_dev_trim(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ata_device *ata_dev = transport_class_to_dev(dev); + unsigned char *mode; + + if (!ata_id_has_trim(ata_dev->id)) + mode = "unsupported"; + else if (ata_dev->horkage & ATA_HORKAGE_NOTRIM) + mode = "forced_unsupported"; + else if (ata_dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) + mode = "forced_unqueued"; + else if (ata_fpdma_dsm_supported(ata_dev)) + mode = "queued"; + else + mode = "unqueued"; + + return snprintf(buf, 20, "%s\n", mode); +} + +static DEVICE_ATTR(trim, S_IRUGO, show_ata_dev_trim, NULL); + static DECLARE_TRANSPORT_CLASS(ata_dev_class, "ata_device", NULL, NULL, NULL); @@ -733,6 +756,7 @@ struct scsi_transport_template *ata_attach_transport(void) SETUP_DEV_ATTRIBUTE(ering); SETUP_DEV_ATTRIBUTE(id); SETUP_DEV_ATTRIBUTE(gscr); + SETUP_DEV_ATTRIBUTE(trim); BUG_ON(count > ATA_DEV_ATTRS); i->dev_attrs[count] = NULL; diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index a998a175f9f14..f840ca18a7c01 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -67,8 +67,7 @@ extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); -extern u64 ata_tf_read_block(const struct ata_taskfile *tf, - struct ata_device *dev); +extern u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev); extern unsigned ata_exec_internal(struct ata_device *dev, struct ata_taskfile *tf, const u8 *cdb, int dma_dir, void *buf, unsigned int buflen, @@ -138,9 +137,6 @@ extern int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht); extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); -extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); -extern void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, - const struct ata_taskfile *tf); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c index 47e418b8c8baa..4d1a5d2c4287f 100644 --- a/drivers/ata/pata_jmicron.c +++ b/drivers/ata/pata_jmicron.c @@ -143,18 +143,6 @@ static int jmicron_init_one (struct pci_dev *pdev, const struct pci_device_id *i }; const struct ata_port_info *ppi[] = { &info, NULL }; - /* - * The JMicron chip 361/363 contains one SATA controller and one - * PATA controller,for powering on these both controllers, we must - * follow the sequence one by one, otherwise one of them can not be - * powered on successfully, so here we disable the async suspend - * method for these chips. - */ - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && - (pdev->device == PCI_DEVICE_ID_JMICRON_JMB363 || - pdev->device == PCI_DEVICE_ID_JMICRON_JMB361)) - device_disable_async_suspend(&pdev->dev); - return ata_pci_bmdma_init_one(pdev, ppi, &jmicron_sht, NULL, 0); } diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c index 5b93852392b8c..0d752851a1eef 100644 --- a/drivers/auxdisplay/ks0108.c +++ b/drivers/auxdisplay/ks0108.c @@ -139,6 +139,7 @@ static int __init ks0108_init(void) ks0108_pardevice = parport_register_device(ks0108_parport, KS0108_NAME, NULL, NULL, NULL, PARPORT_DEV_EXCL, NULL); + parport_put_port(ks0108_parport); if (ks0108_pardevice == NULL) { printk(KERN_ERR KS0108_NAME ": ERROR: " "parport didn't register new device\n"); diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index df0c66cb7ad37..fdba441457ecd 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -148,7 +148,11 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) if (sibling == cpu) /* skip itself */ continue; + sib_cpu_ci = get_cpu_cacheinfo(sibling); + if (!sib_cpu_ci->info_list) + continue; + sib_leaf = sib_cpu_ci->info_list + index; cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); @@ -159,6 +163,9 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) static void free_cache_attributes(unsigned int cpu) { + if (!per_cpu_cacheinfo(cpu)) + return; + cache_shared_cpu_map_remove(cpu); kfree(per_cpu_cacheinfo(cpu)); @@ -514,8 +521,7 @@ static int cacheinfo_cpu_callback(struct notifier_block *nfb, break; case CPU_DEAD: cache_remove_dev(cpu); - if (per_cpu_cacheinfo(cpu)) - free_cache_attributes(cpu); + free_cache_attributes(cpu); break; } return notifier_from_errno(rc); diff --git a/drivers/base/devres.c b/drivers/base/devres.c index c8a53d1e019fb..8754646901174 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -297,10 +297,10 @@ void * devres_get(struct device *dev, void *new_res, if (!dr) { add_dr(dev, &new_dr->node); dr = new_dr; - new_dr = NULL; + new_res = NULL; } spin_unlock_irqrestore(&dev->devres_lock, flags); - devres_free(new_dr); + devres_free(new_res); return dr->data; } diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 171841ad10089..4d1d9de4f9bfc 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -544,10 +544,8 @@ static void fw_dev_release(struct device *dev) kfree(fw_priv); } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env) { - struct firmware_priv *fw_priv = to_firmware_priv(dev); - if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout)) @@ -558,6 +556,18 @@ static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct firmware_priv *fw_priv = to_firmware_priv(dev); + int err = 0; + + mutex_lock(&fw_lock); + if (fw_priv->buf) + err = do_firmware_uevent(fw_priv, env); + mutex_unlock(&fw_lock); + return err; +} + static struct class firmware_class = { .name = "firmware", .class_attrs = firmware_class_attrs, diff --git a/drivers/base/node.c b/drivers/base/node.c index a2aa65b4215d3..b10479c873576 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -388,6 +388,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int page_nid; + /* + * memory block could have several absent sections from start. + * skip pfn range from absent section + */ + if (!pfn_present(pfn)) { + pfn = round_down(pfn + PAGES_PER_SECTION, + PAGES_PER_SECTION) - 1; + continue; + } + page_nid = get_nid_for_pfn(pfn); if (page_nid < 0) continue; diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ebf034b972785..7403de94832c0 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -375,9 +375,7 @@ int platform_device_add(struct platform_device *pdev) while (--i >= 0) { struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if (type == IORESOURCE_MEM || type == IORESOURCE_IO) + if (r->parent) release_resource(r); } @@ -408,9 +406,7 @@ void platform_device_del(struct platform_device *pdev) for (i = 0; i < pdev->num_resources; i++) { struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if (type == IORESOURCE_MEM || type == IORESOURCE_IO) + if (r->parent) release_resource(r); } } diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 7fdd0172605af..ac3c07db92f10 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -37,7 +37,7 @@ struct pm_clock_entry { * @dev: The device for the given clock * @ce: PM clock entry corresponding to the clock. */ -static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) +static inline void __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) { int ret; @@ -49,8 +49,6 @@ static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) dev_err(dev, "%s: failed to enable clk %p, error %d\n", __func__, ce->clk, ret); } - - return ret; } /** @@ -93,7 +91,7 @@ static int __pm_clk_add(struct device *dev, const char *con_id, return -ENOMEM; } } else { - if (IS_ERR(ce->clk) || !__clk_get(clk)) { + if (IS_ERR(clk) || !__clk_get(clk)) { kfree(ce); return -ENOENT; } diff --git a/drivers/base/property.c b/drivers/base/property.c index 1d0b116cae959..0a60ef1500cdc 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -26,9 +26,10 @@ */ void device_add_property_set(struct device *dev, struct property_set *pset) { - if (pset) - pset->fwnode.type = FWNODE_PDATA; + if (!pset) + return; + pset->fwnode.type = FWNODE_PDATA; set_secondary_fwnode(dev, &pset->fwnode); } EXPORT_SYMBOL_GPL(device_add_property_set); diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 81751a49d8bf2..56486d92c4e72 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -296,11 +296,20 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; - present = krealloc(rbnode->cache_present, - BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); - return -ENOMEM; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { + present = krealloc(rbnode->cache_present, + BITS_TO_LONGS(blklen) * sizeof(*present), + GFP_KERNEL); + if (!present) { + kfree(blk); + return -ENOMEM; + } + + memset(present + BITS_TO_LONGS(rbnode->blklen), 0, + (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) + * sizeof(*present)); + } else { + present = rbnode->cache_present; } /* insert the register value in the correct place in the rbnode block */ diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 5799a0b9e6cc4..c8941f39c9190 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -32,8 +32,7 @@ static DEFINE_MUTEX(regmap_debugfs_early_lock); /* Calculate the length of a fixed format */ static size_t regmap_calc_reg_len(int max_val, char *buf, size_t buf_size) { - snprintf(buf, buf_size, "%x", max_val); - return strlen(buf); + return snprintf(NULL, 0, "%x", max_val); } static ssize_t regmap_name_read_file(struct file *file, @@ -432,7 +431,7 @@ static ssize_t regmap_access_read_file(struct file *file, /* If we're in the region the user is trying to read */ if (p >= *ppos) { /* ...but not beyond it */ - if (buf_pos >= count - 1 - tot_len) + if (buf_pos + tot_len + 1 >= count) break; /* Format the register */ diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 6273ff072f3ea..1c76dcb502cf3 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -945,11 +945,10 @@ EXPORT_SYMBOL_GPL(devm_regmap_init); static void regmap_field_init(struct regmap_field *rm_field, struct regmap *regmap, struct reg_field reg_field) { - int field_bits = reg_field.msb - reg_field.lsb + 1; rm_field->regmap = regmap; rm_field->reg = reg_field.reg; rm_field->shift = reg_field.lsb; - rm_field->mask = ((BIT(field_bits) - 1) << reg_field.lsb); + rm_field->mask = GENMASK(reg_field.msb, reg_field.lsb); rm_field->id_size = reg_field.id_size; rm_field->id_offset = reg_field.id_offset; } @@ -2318,7 +2317,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, &ival); if (ret != 0) return ret; - memcpy(val + (i * val_bytes), &ival, val_bytes); + map->format.format_val(val + (i * val_bytes), ival, 0); } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d7173cb1ea76c..cef6fa83a2740 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; -static struct workqueue_struct *loop_wq; - static int transfer_xor(struct loop_device *lo, int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, @@ -725,6 +723,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) goto out_putf; + error = -ENOMEM; + lo->wq = alloc_workqueue("kloopd%d", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16, + lo->lo_number); + if (!lo->wq) + goto out_putf; error = 0; @@ -872,6 +876,8 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; + destroy_workqueue(lo->wq); + lo->wq = NULL; mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -1425,9 +1431,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct loop_device *lo = cmd->rq->q->queuedata; blk_mq_start_request(bd->rq); + if (lo->lo_state != Lo_bound) + return -EIO; + if (cmd->rq->cmd_flags & REQ_WRITE) { struct loop_device *lo = cmd->rq->q->queuedata; bool need_sched = true; @@ -1441,9 +1451,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&lo->lo_lock); if (need_sched) - queue_work(loop_wq, &lo->write_work); + queue_work(lo->wq, &lo->write_work); } else { - queue_work(loop_wq, &cmd->read_work); + queue_work(lo->wq, &cmd->read_work); } return BLK_MQ_RQ_QUEUE_OK; @@ -1455,9 +1465,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd) struct loop_device *lo = cmd->rq->q->queuedata; int ret = -EIO; - if (lo->lo_state != Lo_bound) - goto failed; - if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) goto failed; @@ -1806,13 +1813,6 @@ static int __init loop_init(void) goto misc_out; } - loop_wq = alloc_workqueue("kloopd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0); - if (!loop_wq) { - err = -ENOMEM; - goto misc_out; - } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1850,8 +1850,6 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); - destroy_workqueue(loop_wq); - misc_deregister(&loop_misc); } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 301c27f8323ff..49564edf55816 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -54,6 +54,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; + struct workqueue_struct *wq; struct list_head write_cmd_head; struct work_struct write_work; bool write_started; diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 683dff272562b..04c0e8f3183c3 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -590,6 +590,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); + bool requeue = false; u16 status = le16_to_cpup(&cqe->status) >> 1; @@ -598,12 +599,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, && (jiffies - req->start_time) < req->timeout) { unsigned long flags; + requeue = true; blk_mq_requeue_request(req); spin_lock_irqsave(req->q->queue_lock, flags); if (!blk_queue_stopped(req->q)) blk_mq_kick_requeue_list(req->q); spin_unlock_irqrestore(req->q->queue_lock, flags); - return; + goto release_iod; } req->errors = nvme_error_status(status); } else @@ -613,7 +615,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, dev_warn(&nvmeq->dev->pci_dev->dev, "completing aborted command with status:%04x\n", status); - + release_iod: if (iod->nents) { dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -626,7 +628,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req); + if (likely(!requeue)) + blk_mq_complete_request(req); } /* length is in bytes. gfp flags indicates whether we may sleep. */ diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec6c5c6e1ac94..09138ceba046b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v) #define RBD_MINORS_PER_MAJOR 256 #define RBD_SINGLE_MAJOR_PART_SHIFT 4 +#define RBD_MAX_PARENT_CHAIN_LEN 16 + #define RBD_SNAP_DEV_NAME_PREFIX "snap_" #define RBD_MAX_SNAP_NAME_LEN \ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) @@ -425,7 +427,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf, size_t count); -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth); static void rbd_spec_put(struct rbd_spec *spec); static int rbd_dev_id_to_minor(int dev_id) @@ -522,6 +524,7 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ +static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request); static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); static void rbd_img_parent_read(struct rbd_obj_request *obj_request); static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); @@ -1797,6 +1800,16 @@ static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) obj_request_done_set(obj_request); } +static void rbd_osd_call_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + + if (obj_request_img_data_test(obj_request)) + rbd_osd_copyup_callback(obj_request); + else + obj_request_done_set(obj_request); +} + static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, struct ceph_msg *msg) { @@ -1845,6 +1858,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_osd_discard_callback(obj_request); break; case CEPH_OSD_OP_CALL: + rbd_osd_call_callback(obj_request); + break; case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: rbd_osd_trivial_callback(obj_request); @@ -2001,11 +2016,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, rbd_assert(obj_request_type_valid(type)); size = strlen(object_name) + 1; - name = kmalloc(size, GFP_KERNEL); + name = kmalloc(size, GFP_NOIO); if (!name) return NULL; - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); + obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); if (!obj_request) { kfree(name); return NULL; @@ -2509,13 +2524,15 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, } static void -rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) +rbd_osd_copyup_callback(struct rbd_obj_request *obj_request) { struct rbd_img_request *img_request; struct rbd_device *rbd_dev; struct page **pages; u32 page_count; + dout("%s: obj %p\n", __func__, obj_request); + rbd_assert(obj_request->type == OBJ_REQUEST_BIO || obj_request->type == OBJ_REQUEST_NODATA); rbd_assert(obj_request_img_data_test(obj_request)); @@ -2542,9 +2559,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) if (!obj_request->result) obj_request->xferred = obj_request->length; - /* Finish up with the normal image object callback */ - - rbd_img_obj_callback(obj_request); + obj_request_done_set(obj_request); } static void @@ -2629,7 +2644,6 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) /* All set, send it off. */ - orig_request->callback = rbd_img_obj_copyup_callback; osdc = &rbd_dev->rbd_client->client->osdc; img_result = rbd_obj_request_submit(osdc, orig_request); if (!img_result) @@ -3403,6 +3417,7 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } img_request->rq = rq; + snapc = NULL; /* img_request consumes a ref */ if (op_type == OBJ_OP_DISCARD) result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, @@ -3785,6 +3800,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_zeroes_data = 1; blk_queue_merge_bvec(q, rbd_merge_bvec); + if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) + q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + disk->queue = q; q->queuedata = rbd_dev; @@ -5130,45 +5148,50 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) +/* + * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() -> + * rbd_dev_image_probe() recursion depth, which means it's also the + * length of the already discovered part of the parent chain. + */ +static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth) { struct rbd_device *parent = NULL; - struct rbd_spec *parent_spec; - struct rbd_client *rbdc; int ret; if (!rbd_dev->parent_spec) return 0; - /* - * We need to pass a reference to the client and the parent - * spec when creating the parent rbd_dev. Images related by - * parent/child relationships always share both. - */ - parent_spec = rbd_spec_get(rbd_dev->parent_spec); - rbdc = __rbd_get_client(rbd_dev->rbd_client); - ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec); - if (!parent) + if (++depth > RBD_MAX_PARENT_CHAIN_LEN) { + pr_info("parent chain is too long (%d)\n", depth); + ret = -EINVAL; goto out_err; + } + + parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec); + if (!parent) { + ret = -ENOMEM; + goto out_err; + } + + /* + * Images related by parent/child relationships always share + * rbd_client and spec/parent_spec, so bump their refcounts. + */ + __rbd_get_client(rbd_dev->rbd_client); + rbd_spec_get(rbd_dev->parent_spec); - ret = rbd_dev_image_probe(parent, false); + ret = rbd_dev_image_probe(parent, depth); if (ret < 0) goto out_err; + rbd_dev->parent = parent; atomic_set(&rbd_dev->parent_ref, 1); - return 0; + out_err: - if (parent) { - rbd_dev_unparent(rbd_dev); - kfree(rbd_dev->header_name); + rbd_dev_unparent(rbd_dev); + if (parent) rbd_dev_destroy(parent); - } else { - rbd_put_client(rbdc); - rbd_spec_put(parent_spec); - } - return ret; } @@ -5286,7 +5309,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) * parent), initiate a watch on its header object before using that * object to get detailed information about the rbd image. */ -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { int ret; @@ -5304,7 +5327,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) if (ret) goto err_out_format; - if (mapping) { + if (!depth) { ret = rbd_dev_header_watch_sync(rbd_dev); if (ret) { if (ret == -ENOENT) @@ -5325,7 +5348,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Otherwise this is a parent image, identified by pool, image * and snap ids - need to fill in names for those ids. */ - if (mapping) + if (!depth) ret = rbd_spec_fill_snap_id(rbd_dev); else ret = rbd_spec_fill_names(rbd_dev); @@ -5347,12 +5370,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Need to warn users if this image is the one being * mapped and has a parent. */ - if (mapping && rbd_dev->parent_spec) + if (!depth && rbd_dev->parent_spec) rbd_warn(rbd_dev, "WARNING: kernel layering is EXPERIMENTAL!"); } - ret = rbd_dev_probe_parent(rbd_dev); + ret = rbd_dev_probe_parent(rbd_dev, depth); if (ret) goto err_out_probe; @@ -5363,7 +5386,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: - if (mapping) + if (!depth) rbd_dev_header_unwatch_sync(rbd_dev); out_header_name: kfree(rbd_dev->header_name); @@ -5428,7 +5451,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, rbdc = NULL; /* rbd_dev now owns this */ spec = NULL; /* rbd_dev now owns this */ - rc = rbd_dev_image_probe(rbd_dev, true); + rc = rbd_dev_image_probe(rbd_dev, 0); if (rc < 0) goto err_out_rbd_dev; diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 713fc9ff11492..3e9ec9523f735 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -362,8 +362,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) return; } - if (work_pending(&blkif->persistent_purge_work)) { - pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n"); + if (work_busy(&blkif->persistent_purge_work)) { + pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); return; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c61cf8c6f61d..42ef86c409b69 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1118,8 +1118,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); - list_add(&indirect_page->lru, &info->indirect_pages); + if (!info->feature_persistent) { + indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); + list_add(&indirect_page->lru, &info->indirect_pages); + } s->indirect_grants[i]->gref = GRANT_INVALID_REF; list_add_tail(&s->indirect_grants[i]->node, &info->grants); } @@ -1923,7 +1925,8 @@ static void blkback_changed(struct xenbus_device *dev, break; /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: - blkfront_closing(info); + if (info) + blkfront_closing(info); break; } } diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index f1ff39a3d1c12..6fbb10ca73b10 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -76,7 +76,7 @@ static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) */ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) { - struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_NOIO); if (!zstrm) return NULL; @@ -85,7 +85,7 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) * allocate 2 pages. 1 for compressed data, plus 1 extra for the * case when compressed size is larger than the original one */ - zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + zstrm->buffer = (void *)__get_free_pages(GFP_NOIO | __GFP_ZERO, 1); if (!zstrm->private || !zstrm->buffer) { zcomp_strm_free(comp, zstrm); zstrm = NULL; @@ -325,12 +325,14 @@ void zcomp_destroy(struct zcomp *comp) * allocate new zcomp and initialize it. return compressing * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in - * case of allocation error. + * case of allocation error, or any other error potentially + * returned by functions zcomp_strm_{multi,single}_create. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; + int error; backend = find_backend(compress); if (!backend) @@ -342,12 +344,12 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) comp->backend = backend; if (max_strm > 1) - zcomp_strm_multi_create(comp, max_strm); + error = zcomp_strm_multi_create(comp, max_strm); else - zcomp_strm_single_create(comp); - if (!comp->stream) { + error = zcomp_strm_single_create(comp); + if (error) { kfree(comp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } return comp; } diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c index f2afb7e988c37..dd6083124276f 100644 --- a/drivers/block/zram/zcomp_lz4.c +++ b/drivers/block/zram/zcomp_lz4.c @@ -10,17 +10,36 @@ #include #include #include +#include +#include #include "zcomp_lz4.h" static void *zcomp_lz4_create(void) { - return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); + void *ret; + + /* + * This function can be called in swapout/fs write path + * so we can't use GFP_FS|IO. And it assumes we already + * have at least one stream in zram initialization so we + * don't do best effort to allocate more stream in here. + * A default stream will work well without further multiple + * streams. That's why we use NORETRY | NOWARN. + */ + ret = kzalloc(LZ4_MEM_COMPRESS, GFP_NOIO | __GFP_NORETRY | + __GFP_NOWARN); + if (!ret) + ret = __vmalloc(LZ4_MEM_COMPRESS, + GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN | + __GFP_ZERO | __GFP_HIGHMEM, + PAGE_KERNEL); + return ret; } static void zcomp_lz4_destroy(void *private) { - kfree(private); + kvfree(private); } static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c index da1bc47d588e9..edc549920fa06 100644 --- a/drivers/block/zram/zcomp_lzo.c +++ b/drivers/block/zram/zcomp_lzo.c @@ -10,17 +10,36 @@ #include #include #include +#include +#include #include "zcomp_lzo.h" static void *lzo_create(void) { - return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + void *ret; + + /* + * This function can be called in swapout/fs write path + * so we can't use GFP_FS|IO. And it assumes we already + * have at least one stream in zram initialization so we + * don't do best effort to allocate more stream in here. + * A default stream will work well without further multiple + * streams. That's why we use NORETRY | NOWARN. + */ + ret = kzalloc(LZO1X_MEM_COMPRESS, GFP_NOIO | __GFP_NORETRY | + __GFP_NOWARN); + if (!ret) + ret = __vmalloc(LZO1X_MEM_COMPRESS, + GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN | + __GFP_ZERO | __GFP_HIGHMEM, + PAGE_KERNEL); + return ret; } static void lzo_destroy(void *private) { - kfree(private); + kvfree(private); } static int lzo_compress(const unsigned char *src, unsigned char *dst, diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 8c81af6dbe063..fa893c3ec4087 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -80,6 +80,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0489, 0xe057) }, { USB_DEVICE(0x0489, 0xe056) }, { USB_DEVICE(0x0489, 0xe05f) }, + { USB_DEVICE(0x0489, 0xe076) }, { USB_DEVICE(0x0489, 0xe078) }, { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x04CA, 0x3004) }, @@ -88,9 +89,11 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x3007) }, { USB_DEVICE(0x04CA, 0x3008) }, { USB_DEVICE(0x04CA, 0x300b) }, + { USB_DEVICE(0x04CA, 0x300d) }, { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x0930, 0x0219) }, + { USB_DEVICE(0x0930, 0x021c) }, { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0930, 0x0227) }, { USB_DEVICE(0x0b05, 0x17d0) }, @@ -102,6 +105,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0x311F) }, { USB_DEVICE(0x0cf3, 0x3121) }, { USB_DEVICE(0x0CF3, 0x817a) }, + { USB_DEVICE(0x0CF3, 0x817b) }, { USB_DEVICE(0x0cf3, 0xe003) }, { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, @@ -113,6 +117,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x13d3, 0x3408) }, { USB_DEVICE(0x13d3, 0x3423) }, { USB_DEVICE(0x13d3, 0x3432) }, + { USB_DEVICE(0x13d3, 0x3474) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, @@ -137,6 +142,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, @@ -145,9 +151,11 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, @@ -159,6 +167,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0CF3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 }, @@ -170,6 +179,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 4bba86677adc6..3f146c9911c17 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -378,12 +378,11 @@ int btbcm_setup_apple(struct hci_dev *hdev) /* Read Verbose Config Version Info */ skb = btbcm_read_verbose_config(hdev); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, skb->data[1], - get_unaligned_le16(skb->data + 5)); - kfree_skb(skb); + if (!IS_ERR(skb)) { + BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, skb->data[1], + get_unaligned_le16(skb->data + 5)); + kfree_skb(skb); + } set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3c10d4dfe9a79..fdba79c3877cd 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -144,6 +144,10 @@ static const struct usb_device_id btusb_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01), .driver_info = BTUSB_BCM_PATCHRAM }, + /* Toshiba Corp - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01), + .driver_info = BTUSB_BCM_PATCHRAM }, + /* Intel Bluetooth USB Bootloader (RAM module) */ { USB_DEVICE(0x8087, 0x0a5a), .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC }, @@ -178,6 +182,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, @@ -186,9 +191,11 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, @@ -200,6 +207,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, @@ -211,6 +219,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, @@ -265,7 +274,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */ - { USB_DEVICE(0x1300, 0x0001), .driver_info = BTUSB_SWAVE }, + { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE }, /* Digianswer devices */ { USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER }, @@ -1990,6 +1999,8 @@ static int btusb_setup_intel(struct hci_dev *hdev) } fw_ptr = fw->data; + kfree_skb(skb); + /* This Intel specific command enables the manufacturer mode of the * controller. * @@ -2331,6 +2342,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) struct intel_boot_params *params; const struct firmware *fw; const u8 *fw_ptr; + u32 frag_len; char fwname[64]; ktime_t calltime, delta, rettime; unsigned long long duration; @@ -2537,24 +2549,33 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) } fw_ptr = fw->data + 644; + frag_len = 0; while (fw_ptr - fw->data < fw->size) { - struct hci_command_hdr *cmd = (void *)fw_ptr; - u8 cmd_len; + struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len); - cmd_len = sizeof(*cmd) + cmd->plen; + frag_len += sizeof(*cmd) + cmd->plen; - /* Send each command from the firmware data buffer as - * a single Data fragment. + /* The paramter length of the secure send command requires + * a 4 byte alignment. It happens so that the firmware file + * contains proper Intel_NOP commands to align the fragments + * as needed. + * + * Send set of commands with 4 byte alignment from the + * firmware data buffer as a single Data fragement. */ - err = btusb_intel_secure_send(hdev, 0x01, cmd_len, fw_ptr); - if (err < 0) { - BT_ERR("%s: Failed to send firmware data (%d)", - hdev->name, err); - goto done; - } + if (!(frag_len % 4)) { + err = btusb_intel_secure_send(hdev, 0x01, frag_len, + fw_ptr); + if (err < 0) { + BT_ERR("%s: Failed to send firmware data (%d)", + hdev->name, err); + goto done; + } - fw_ptr += cmd_len; + fw_ptr += frag_len; + frag_len = 0; + } } set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index aaa0f2a871185..60397ec77ff7b 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -212,7 +212,7 @@ static int arm_ccn_node_to_xp_port(int node) static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port) { - *config &= ~((0xff << 0) | (0xff << 8) | (0xff << 24)); + *config &= ~((0xff << 0) | (0xff << 8) | (0x3 << 24)); *config |= (node_xp << 0) | (type << 8) | (port << 24); } diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 0b4188b9af7cd..c6dea3f6917bd 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -581,7 +581,7 @@ static inline int needs_ilk_vtd_wa(void) /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB || + if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && intel_iommu_gfx_mapped) return 1; diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index da8faf78536a3..5643b65cee204 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -429,7 +429,7 @@ static int hwrng_fillfn(void *unused) static void start_khwrngd(void) { hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); - if (hwrng_fill == ERR_PTR(-ENOMEM)) { + if (IS_ERR(hwrng_fill)) { pr_err("hwrng_fill thread creation failed"); hwrng_fill = NULL; } diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index a43048b5b05fd..3c1a123f909cc 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -900,6 +900,21 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = { MODULE_DEVICE_TABLE(dmi, i8k_dmi_table); +static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = { + { + /* + * CPU fan speed going up and down on Dell Studio XPS 8100 + * for unknown reasons. + */ + .ident = "Dell Studio XPS 8100", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"), + }, + }, + { } +}; + /* * Probe for the presence of a supported laptop. */ @@ -911,7 +926,8 @@ static int __init i8k_probe(void) /* * Get DMI information */ - if (!dmi_check_system(i8k_dmi_table)) { + if (!dmi_check_system(i8k_dmi_table) || + dmi_check_system(i8k_blacklist_dmi_table)) { if (!ignore_dmi && !force) return -ENODEV; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 8a45e92ff60c7..05222706dc66c 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -404,18 +404,42 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) return rv; } -static void start_check_enables(struct smi_info *smi_info) +static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) +{ + smi_info->last_timeout_jiffies = jiffies; + mod_timer(&smi_info->si_timer, new_val); + smi_info->timer_running = true; +} + +/* + * Start a new message and (re)start the timer and thread. + */ +static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, + unsigned int size) +{ + smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES); + + if (smi_info->thread) + wake_up_process(smi_info->thread); + + smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); +} + +static void start_check_enables(struct smi_info *smi_info, bool start_timer) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + if (start_timer) + start_new_msg(smi_info, msg, 2); + else + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info) +static void start_clear_flags(struct smi_info *smi_info, bool start_timer) { unsigned char msg[3]; @@ -424,7 +448,10 @@ static void start_clear_flags(struct smi_info *smi_info) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + if (start_timer) + start_new_msg(smi_info, msg, 3); + else + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -434,10 +461,8 @@ static void start_getting_msg_queue(struct smi_info *smi_info) smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD; smi_info->curr_msg->data_size = 2; - smi_info->handlers->start_transaction( - smi_info->si_sm, - smi_info->curr_msg->data, - smi_info->curr_msg->data_size); + start_new_msg(smi_info, smi_info->curr_msg->data, + smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_MESSAGES; } @@ -447,20 +472,11 @@ static void start_getting_events(struct smi_info *smi_info) smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD; smi_info->curr_msg->data_size = 2; - smi_info->handlers->start_transaction( - smi_info->si_sm, - smi_info->curr_msg->data, - smi_info->curr_msg->data_size); + start_new_msg(smi_info, smi_info->curr_msg->data, + smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_EVENTS; } -static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) -{ - smi_info->last_timeout_jiffies = jiffies; - mod_timer(&smi_info->si_timer, new_val); - smi_info->timer_running = true; -} - /* * When we have a situtaion where we run out of memory and cannot * allocate messages, we just leave them in the BMC and run the system @@ -470,11 +486,11 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info) +static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) { if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info); + start_check_enables(smi_info, start_timer); return true; } return false; @@ -484,7 +500,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info); + start_check_enables(smi_info, true); return true; } return false; @@ -502,7 +518,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info)) + if (!disable_si_irq(smi_info, true)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -518,7 +534,7 @@ static void handle_flags(struct smi_info *smi_info) /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info); + start_clear_flags(smi_info, true); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -870,8 +886,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_MSG_FLAGS_CMD; - smi_info->handlers->start_transaction( - smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_GETTING_FLAGS; goto restart; } @@ -901,7 +916,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->irq) { - start_check_enables(smi_info); + start_check_enables(smi_info, true); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -1203,14 +1218,14 @@ static int smi_start_processing(void *send_info, new_smi->intf = intf; - /* Try to claim any interrupts. */ - if (new_smi->irq_setup) - new_smi->irq_setup(new_smi); - /* Set up the timer that drives the interface. */ setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi); smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); + /* Try to claim any interrupts. */ + if (new_smi->irq_setup) + new_smi->irq_setup(new_smi); + /* * Check if the user forcefully enabled the daemon. */ @@ -3515,7 +3530,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi); + start_clear_flags(new_smi, false); /* * IRQ is defined to be set when non-zero. req_events will @@ -3817,7 +3832,7 @@ static void cleanup_one_si(struct smi_info *to_clean) poll(to_clean); schedule_timeout_uninterruptible(1); } - disable_si_irq(to_clean); + disable_si_irq(to_clean, false); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 283f00a7f0362..1082d4bb016a9 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -129,8 +129,9 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, device_initialize(&chip->dev); - chip->cdev.owner = chip->pdev->driver->owner; cdev_init(&chip->cdev, &tpm_fops); + chip->cdev.owner = chip->pdev->driver->owner; + chip->cdev.kobj.parent = &chip->dev.kobj; return chip; } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index f8319a0860fd7..39be5acc9c480 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -115,6 +115,13 @@ enum tpm2_startup_types { TPM2_SU_STATE = 0x0001, }; +enum tpm2_start_method { + TPM2_START_ACPI = 2, + TPM2_START_FIFO = 6, + TPM2_START_CRB = 7, + TPM2_START_CRB_WITH_ACPI = 8, +}; + struct tpm_chip; struct tpm_vendor_specific { diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index b26ceee3585e1..2b971b3e5c1cf 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -34,12 +34,6 @@ enum crb_defaults { CRB_ACPI_START_INDEX = 1, }; -enum crb_start_method { - CRB_SM_ACPI_START = 2, - CRB_SM_CRB = 7, - CRB_SM_CRB_WITH_ACPI_START = 8, -}; - struct acpi_tpm2 { struct acpi_table_header hdr; u16 platform_class; @@ -220,12 +214,6 @@ static int crb_acpi_add(struct acpi_device *device) u64 pa; int rc; - chip = tpmm_chip_alloc(dev, &tpm_crb); - if (IS_ERR(chip)) - return PTR_ERR(chip); - - chip->flags = TPM_CHIP_FLAG_TPM2; - status = acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **) &buf); if (ACPI_FAILURE(status)) { @@ -233,6 +221,16 @@ static int crb_acpi_add(struct acpi_device *device) return -ENODEV; } + /* Should the FIFO driver handle this? */ + if (buf->start_method == TPM2_START_FIFO) + return -ENODEV; + + chip = tpmm_chip_alloc(dev, &tpm_crb); + if (IS_ERR(chip)) + return PTR_ERR(chip); + + chip->flags = TPM_CHIP_FLAG_TPM2; + if (buf->hdr.length < sizeof(struct acpi_tpm2)) { dev_err(dev, "TPM2 ACPI table has wrong size"); return -EINVAL; @@ -251,11 +249,11 @@ static int crb_acpi_add(struct acpi_device *device) * report only ACPI start but in practice seems to require both * ACPI start and CRB start. */ - if (sm == CRB_SM_CRB || sm == CRB_SM_CRB_WITH_ACPI_START || + if (sm == TPM2_START_CRB || sm == TPM2_START_FIFO || !strcmp(acpi_device_hid(device), "MSFT0101")) priv->flags |= CRB_FL_CRB_START; - if (sm == CRB_SM_ACPI_START || sm == CRB_SM_CRB_WITH_ACPI_START) + if (sm == TPM2_START_ACPI || sm == TPM2_START_CRB_WITH_ACPI) priv->flags |= CRB_FL_ACPI_START; priv->cca = (struct crb_control_area __iomem *) @@ -267,7 +265,7 @@ static int crb_acpi_add(struct acpi_device *device) memcpy_fromio(&pa, &priv->cca->cmd_pa, 8); pa = le64_to_cpu(pa); - priv->cmd = devm_ioremap_nocache(dev, le64_to_cpu(pa), + priv->cmd = devm_ioremap_nocache(dev, pa, ioread32(&priv->cca->cmd_size)); if (!priv->cmd) { dev_err(dev, "ioremap of the command buffer failed\n"); @@ -276,7 +274,7 @@ static int crb_acpi_add(struct acpi_device *device) memcpy_fromio(&pa, &priv->cca->rsp_pa, 8); pa = le64_to_cpu(pa); - priv->rsp = devm_ioremap_nocache(dev, le64_to_cpu(pa), + priv->rsp = devm_ioremap_nocache(dev, pa, ioread32(&priv->cca->rsp_size)); if (!priv->rsp) { dev_err(dev, "ioremap of the response buffer failed\n"); diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 42ffa5e7a1e0f..27ebf9511cb41 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -578,6 +578,9 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, goto cleanup; } + ibmvtpm->dev = dev; + ibmvtpm->vdev = vio_dev; + crq_q = &ibmvtpm->crq_queue; crq_q->crq_addr = (struct ibmvtpm_crq *)get_zeroed_page(GFP_KERNEL); if (!crq_q->crq_addr) { @@ -622,8 +625,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, crq_q->index = 0; - ibmvtpm->dev = dev; - ibmvtpm->vdev = vio_dev; TPM_VPRIV(chip) = (void *)ibmvtpm; spin_lock_init(&ibmvtpm->rtce_lock); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index f2dffa770b8e9..696ef1d56b4f5 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2005, 2006 IBM Corporation - * Copyright (C) 2014 Intel Corporation + * Copyright (C) 2014, 2015 Intel Corporation * * Authors: * Leendert van Doorn @@ -28,6 +28,7 @@ #include #include #include +#include #include "tpm.h" enum tis_access { @@ -65,6 +66,17 @@ enum tis_defaults { TIS_LONG_TIMEOUT = 2000, /* 2 sec */ }; +struct tpm_info { + unsigned long start; + unsigned long len; + unsigned int irq; +}; + +static struct tpm_info tis_default_info = { + .start = TIS_MEM_BASE, + .len = TIS_MEM_LEN, + .irq = 0, +}; /* Some timeout values are needed before it is known whether the chip is * TPM 1.0 or TPM 2.0. @@ -91,26 +103,54 @@ struct priv_data { }; #if defined(CONFIG_PNP) && defined(CONFIG_ACPI) -static int is_itpm(struct pnp_dev *dev) +static int has_hid(struct acpi_device *dev, const char *hid) { - struct acpi_device *acpi = pnp_acpi_device(dev); struct acpi_hardware_id *id; - if (!acpi) - return 0; - - list_for_each_entry(id, &acpi->pnp.ids, list) { - if (!strcmp("INTC0102", id->id)) + list_for_each_entry(id, &dev->pnp.ids, list) + if (!strcmp(hid, id->id)) return 1; - } return 0; } + +static inline int is_itpm(struct acpi_device *dev) +{ + return has_hid(dev, "INTC0102"); +} + +static inline int is_fifo(struct acpi_device *dev) +{ + struct acpi_table_tpm2 *tbl; + acpi_status st; + + /* TPM 1.2 FIFO */ + if (!has_hid(dev, "MSFT0101")) + return 1; + + st = acpi_get_table(ACPI_SIG_TPM2, 1, + (struct acpi_table_header **) &tbl); + if (ACPI_FAILURE(st)) { + dev_err(&dev->dev, "failed to get TPM2 ACPI table\n"); + return 0; + } + + if (le32_to_cpu(tbl->start_method) != TPM2_START_FIFO) + return 0; + + /* TPM 2.0 FIFO */ + return 1; +} #else -static inline int is_itpm(struct pnp_dev *dev) +static inline int is_itpm(struct acpi_device *dev) { return 0; } + +static inline int is_fifo(struct acpi_device *dev) +{ + return 1; +} #endif /* Before we attempt to access the TPM we must see that the valid bit is set. @@ -600,9 +640,8 @@ static void tpm_tis_remove(struct tpm_chip *chip) release_locality(chip, chip->vendor.locality, 1); } -static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, - resource_size_t start, resource_size_t len, - unsigned int irq) +static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, + acpi_handle acpi_dev_handle) { u32 vendor, intfcaps, intmask; int rc, i, irq_s, irq_e, probe; @@ -622,7 +661,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, chip->acpi_dev_handle = acpi_dev_handle; #endif - chip->vendor.iobase = devm_ioremap(dev, start, len); + chip->vendor.iobase = devm_ioremap(dev, tpm_info->start, tpm_info->len); if (!chip->vendor.iobase) return -EIO; @@ -707,7 +746,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); if (interrupts) - chip->vendor.irq = irq; + chip->vendor.irq = tpm_info->irq; if (interrupts && !chip->vendor.irq) { irq_s = ioread8(chip->vendor.iobase + @@ -890,27 +929,27 @@ static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume); static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, const struct pnp_device_id *pnp_id) { - resource_size_t start, len; - unsigned int irq = 0; + struct tpm_info tpm_info = tis_default_info; acpi_handle acpi_dev_handle = NULL; - start = pnp_mem_start(pnp_dev, 0); - len = pnp_mem_len(pnp_dev, 0); + tpm_info.start = pnp_mem_start(pnp_dev, 0); + tpm_info.len = pnp_mem_len(pnp_dev, 0); if (pnp_irq_valid(pnp_dev, 0)) - irq = pnp_irq(pnp_dev, 0); + tpm_info.irq = pnp_irq(pnp_dev, 0); else interrupts = false; - if (is_itpm(pnp_dev)) - itpm = true; - #ifdef CONFIG_ACPI - if (pnp_acpi_device(pnp_dev)) + if (pnp_acpi_device(pnp_dev)) { + if (is_itpm(pnp_acpi_device(pnp_dev))) + itpm = true; + acpi_dev_handle = pnp_acpi_device(pnp_dev)->handle; + } #endif - return tpm_tis_init(&pnp_dev->dev, acpi_dev_handle, start, len, irq); + return tpm_tis_init(&pnp_dev->dev, &tpm_info, acpi_dev_handle); } static struct pnp_device_id tpm_pnp_tbl[] = { @@ -930,6 +969,7 @@ MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl); static void tpm_tis_pnp_remove(struct pnp_dev *dev) { struct tpm_chip *chip = pnp_get_drvdata(dev); + tpm_chip_unregister(chip); tpm_tis_remove(chip); } @@ -950,6 +990,79 @@ module_param_string(hid, tpm_pnp_tbl[TIS_HID_USR_IDX].id, MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe"); #endif +#ifdef CONFIG_ACPI +static int tpm_check_resource(struct acpi_resource *ares, void *data) +{ + struct tpm_info *tpm_info = (struct tpm_info *) data; + struct resource res; + + if (acpi_dev_resource_interrupt(ares, 0, &res)) { + tpm_info->irq = res.start; + } else if (acpi_dev_resource_memory(ares, &res)) { + tpm_info->start = res.start; + tpm_info->len = resource_size(&res); + } + + return 1; +} + +static int tpm_tis_acpi_init(struct acpi_device *acpi_dev) +{ + struct list_head resources; + struct tpm_info tpm_info = tis_default_info; + int ret; + + if (!is_fifo(acpi_dev)) + return -ENODEV; + + INIT_LIST_HEAD(&resources); + ret = acpi_dev_get_resources(acpi_dev, &resources, tpm_check_resource, + &tpm_info); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&resources); + + if (!tpm_info.irq) + interrupts = false; + + if (is_itpm(acpi_dev)) + itpm = true; + + return tpm_tis_init(&acpi_dev->dev, &tpm_info, acpi_dev->handle); +} + +static int tpm_tis_acpi_remove(struct acpi_device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(&dev->dev); + + tpm_chip_unregister(chip); + tpm_tis_remove(chip); + + return 0; +} + +static struct acpi_device_id tpm_acpi_tbl[] = { + {"MSFT0101", 0}, /* TPM 2.0 */ + /* Add new here */ + {"", 0}, /* User Specified */ + {"", 0} /* Terminator */ +}; +MODULE_DEVICE_TABLE(acpi, tpm_acpi_tbl); + +static struct acpi_driver tis_acpi_driver = { + .name = "tpm_tis", + .ids = tpm_acpi_tbl, + .ops = { + .add = tpm_tis_acpi_init, + .remove = tpm_tis_acpi_remove, + }, + .drv = { + .pm = &tpm_tis_pm, + }, +}; +#endif + static struct platform_driver tis_drv = { .driver = { .name = "tpm_tis", @@ -966,9 +1079,25 @@ static int __init init_tis(void) { int rc; #ifdef CONFIG_PNP - if (!force) - return pnp_register_driver(&tis_pnp_driver); + if (!force) { + rc = pnp_register_driver(&tis_pnp_driver); + if (rc) + return rc; + } +#endif +#ifdef CONFIG_ACPI + if (!force) { + rc = acpi_bus_register_driver(&tis_acpi_driver); + if (rc) { +#ifdef CONFIG_PNP + pnp_unregister_driver(&tis_pnp_driver); #endif + return rc; + } + } +#endif + if (!force) + return 0; rc = platform_driver_register(&tis_drv); if (rc < 0) @@ -978,7 +1107,7 @@ static int __init init_tis(void) rc = PTR_ERR(pdev); goto err_dev; } - rc = tpm_tis_init(&pdev->dev, NULL, TIS_MEM_BASE, TIS_MEM_LEN, 0); + rc = tpm_tis_init(&pdev->dev, &tis_default_info, NULL); if (rc) goto err_init; return 0; @@ -992,9 +1121,14 @@ static int __init init_tis(void) static void __exit cleanup_tis(void) { struct tpm_chip *chip; -#ifdef CONFIG_PNP +#if defined(CONFIG_PNP) || defined(CONFIG_ACPI) if (!force) { +#ifdef CONFIG_ACPI + acpi_bus_unregister_driver(&tis_acpi_driver); +#endif +#ifdef CONFIG_PNP pnp_unregister_driver(&tis_pnp_driver); +#endif return; } #endif diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 5b0f41868b425..9f9cadd00bc83 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -230,11 +230,12 @@ static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level) if (!c) return; + /* This should be JSON format, i.e. elements separated with a comma */ seq_printf(s, "\"%s\": { ", c->name); seq_printf(s, "\"enable_count\": %d,", c->enable_count); seq_printf(s, "\"prepare_count\": %d,", c->prepare_count); - seq_printf(s, "\"rate\": %lu", clk_core_get_rate(c)); - seq_printf(s, "\"accuracy\": %lu", clk_core_get_accuracy(c)); + seq_printf(s, "\"rate\": %lu,", clk_core_get_rate(c)); + seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy(c)); seq_printf(s, "\"phase\": %d", clk_core_get_phase(c)); } diff --git a/drivers/clk/keystone/pll.c b/drivers/clk/keystone/pll.c index 0dd8a4b12747b..4a375ead70e9f 100644 --- a/drivers/clk/keystone/pll.c +++ b/drivers/clk/keystone/pll.c @@ -37,7 +37,8 @@ * Main PLL or any other PLLs in the device such as ARM PLL, DDR PLL * or PA PLL available on keystone2. These PLLs are controlled by * this register. Main PLL is controlled by a PLL controller. - * @pllm: PLL register map address + * @pllm: PLL register map address for multiplier bits + * @pllod: PLL register map address for post divider bits * @pll_ctl0: PLL controller map address * @pllm_lower_mask: multiplier lower mask * @pllm_upper_mask: multiplier upper mask @@ -53,6 +54,7 @@ struct clk_pll_data { u32 phy_pllm; u32 phy_pll_ctl0; void __iomem *pllm; + void __iomem *pllod; void __iomem *pll_ctl0; u32 pllm_lower_mask; u32 pllm_upper_mask; @@ -102,7 +104,11 @@ static unsigned long clk_pllclk_recalc(struct clk_hw *hw, /* read post divider from od bits*/ postdiv = ((val & pll_data->clkod_mask) >> pll_data->clkod_shift) + 1; - else + else if (pll_data->pllod) { + postdiv = readl(pll_data->pllod); + postdiv = ((postdiv & pll_data->clkod_mask) >> + pll_data->clkod_shift) + 1; + } else postdiv = pll_data->postdiv; rate /= (prediv + 1); @@ -172,12 +178,21 @@ static void __init _of_pll_clk_init(struct device_node *node, bool pllctrl) /* assume the PLL has output divider register bits */ pll_data->clkod_mask = CLKOD_MASK; pll_data->clkod_shift = CLKOD_SHIFT; + + /* + * Check if there is an post-divider register. If not + * assume od bits are part of control register. + */ + i = of_property_match_string(node, "reg-names", + "post-divider"); + pll_data->pllod = of_iomap(node, i); } i = of_property_match_string(node, "reg-names", "control"); pll_data->pll_ctl0 = of_iomap(node, i); if (!pll_data->pll_ctl0) { pr_err("%s: ioremap failed\n", __func__); + iounmap(pll_data->pllod); goto out; } @@ -193,6 +208,7 @@ static void __init _of_pll_clk_init(struct device_node *node, bool pllctrl) pll_data->pllm = of_iomap(node, i); if (!pll_data->pllm) { iounmap(pll_data->pll_ctl0); + iounmap(pll_data->pllod); goto out; } } diff --git a/drivers/clk/pistachio/clk-pistachio.c b/drivers/clk/pistachio/clk-pistachio.c index 8c0fe8828f993..c4ceb5eaf46c1 100644 --- a/drivers/clk/pistachio/clk-pistachio.c +++ b/drivers/clk/pistachio/clk-pistachio.c @@ -159,9 +159,15 @@ PNAME(mux_debug) = { "mips_pll_mux", "rpu_v_pll_mux", "wifi_pll_mux", "bt_pll_mux" }; static u32 mux_debug_idx[] = { 0x0, 0x1, 0x2, 0x4, 0x8, 0x10 }; -static unsigned int pistachio_critical_clks[] __initdata = { - CLK_MIPS, - CLK_PERIPH_SYS, +static unsigned int pistachio_critical_clks_core[] __initdata = { + CLK_MIPS +}; + +static unsigned int pistachio_critical_clks_sys[] __initdata = { + PERIPH_CLK_SYS, + PERIPH_CLK_SYS_BUS, + PERIPH_CLK_DDR, + PERIPH_CLK_ROM, }; static void __init pistachio_clk_init(struct device_node *np) @@ -193,8 +199,8 @@ static void __init pistachio_clk_init(struct device_node *np) pistachio_clk_register_provider(p); - pistachio_clk_force_enable(p, pistachio_critical_clks, - ARRAY_SIZE(pistachio_critical_clks)); + pistachio_clk_force_enable(p, pistachio_critical_clks_core, + ARRAY_SIZE(pistachio_critical_clks_core)); } CLK_OF_DECLARE(pistachio_clk, "img,pistachio-clk", pistachio_clk_init); @@ -261,6 +267,9 @@ static void __init pistachio_clk_periph_init(struct device_node *np) ARRAY_SIZE(pistachio_periph_gates)); pistachio_clk_register_provider(p); + + pistachio_clk_force_enable(p, pistachio_critical_clks_sys, + ARRAY_SIZE(pistachio_critical_clks_sys)); } CLK_OF_DECLARE(pistachio_clk_periph, "img,pistachio-clk-periph", pistachio_clk_periph_init); diff --git a/drivers/clk/pistachio/clk-pll.c b/drivers/clk/pistachio/clk-pll.c index de537560bf709..ebd0d2a3b5dad 100644 --- a/drivers/clk/pistachio/clk-pll.c +++ b/drivers/clk/pistachio/clk-pll.c @@ -115,8 +115,7 @@ static int pll_gf40lp_frac_enable(struct clk_hw *hw) u32 val; val = pll_readl(pll, PLL_CTRL3); - val &= ~(PLL_FRAC_CTRL3_PD | PLL_FRAC_CTRL3_DACPD | - PLL_FRAC_CTRL3_DSMPD | PLL_FRAC_CTRL3_FOUTPOSTDIVPD | + val &= ~(PLL_FRAC_CTRL3_PD | PLL_FRAC_CTRL3_FOUTPOSTDIVPD | PLL_FRAC_CTRL3_FOUT4PHASEPD | PLL_FRAC_CTRL3_FOUTVCOPD); pll_writel(pll, val, PLL_CTRL3); @@ -233,7 +232,7 @@ static int pll_gf40lp_laint_enable(struct clk_hw *hw) u32 val; val = pll_readl(pll, PLL_CTRL1); - val &= ~(PLL_INT_CTRL1_PD | PLL_INT_CTRL1_DSMPD | + val &= ~(PLL_INT_CTRL1_PD | PLL_INT_CTRL1_FOUTPOSTDIVPD | PLL_INT_CTRL1_FOUTVCOPD); pll_writel(pll, val, PLL_CTRL1); diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c index 6cd88d963a7ff..542e45ef5087b 100644 --- a/drivers/clk/pxa/clk-pxa25x.c +++ b/drivers/clk/pxa/clk-pxa25x.c @@ -79,7 +79,7 @@ unsigned int pxa25x_get_clk_frequency_khz(int info) clks[3] / 1000000, (clks[3] % 1000000) / 10000); } - return (unsigned int)clks[0]; + return (unsigned int)clks[0] / KHz; } static unsigned long clk_pxa25x_memory_get_rate(struct clk_hw *hw, diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c index 5f9b54b024b9e..267511df1e59f 100644 --- a/drivers/clk/pxa/clk-pxa27x.c +++ b/drivers/clk/pxa/clk-pxa27x.c @@ -80,7 +80,7 @@ unsigned int pxa27x_get_clk_frequency_khz(int info) pr_info("System bus clock: %ld.%02ldMHz\n", clks[4] / 1000000, (clks[4] % 1000000) / 10000); } - return (unsigned int)clks[0]; + return (unsigned int)clks[0] / KHz; } bool pxa27x_is_ppll_disabled(void) diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index 4b93a1efb36d1..4af4eed5f89f0 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -78,7 +78,7 @@ unsigned int pxa3xx_get_clk_frequency_khz(int info) pr_info("System bus clock: %ld.%02ldMHz\n", clks[4] / 1000000, (clks[4] % 1000000) / 10000); } - return (unsigned int)clks[0]; + return (unsigned int)clks[0] / KHz; } static unsigned long clk_pxa3xx_ac97_get_rate(struct clk_hw *hw, @@ -126,7 +126,7 @@ PARENTS(pxa3xx_ac97_bus) = { "ring_osc_60mhz", "ac97" }; PARENTS(pxa3xx_sbus) = { "ring_osc_60mhz", "system_bus" }; PARENTS(pxa3xx_smemcbus) = { "ring_osc_60mhz", "smemc" }; -#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENA : &CKENB) +#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENB : &CKENA) #define PXA3XX_CKEN(dev_id, con_id, parents, mult_lp, div_lp, mult_hp, \ div_hp, bit, is_lp, flags) \ PXA_CKEN(dev_id, con_id, bit, parents, mult_lp, div_lp, \ diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7e..92936f0912d2f 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -530,19 +530,16 @@ static int clk_pixel_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct freq_tbl f = *rcg->freq_tbl; const struct frac_entry *frac = frac_table_pixel; - unsigned long request, src_rate; + unsigned long request; int delta = 100000; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; - int index = qcom_find_src_index(hw, rcg->parent_map, f.src); - struct clk *parent = clk_get_parent_by_index(hw->clk, index); for (; frac->num; frac++) { request = (rate * frac->den) / frac->num; - src_rate = __clk_round_rate(parent, request); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((parent_rate < (request - delta)) || + (parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, diff --git a/drivers/clk/qcom/gcc-apq8084.c b/drivers/clk/qcom/gcc-apq8084.c index 54a756b90a374..457c540585f92 100644 --- a/drivers/clk/qcom/gcc-apq8084.c +++ b/drivers/clk/qcom/gcc-apq8084.c @@ -2105,6 +2105,7 @@ static struct clk_branch gcc_ce1_clk = { "ce1_clk_src", }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c index c66f7bc2ae87c..5d75bffab141e 100644 --- a/drivers/clk/qcom/gcc-msm8916.c +++ b/drivers/clk/qcom/gcc-msm8916.c @@ -2278,7 +2278,7 @@ static struct clk_branch gcc_prng_ahb_clk = { .halt_check = BRANCH_HALT_VOTED, .clkr = { .enable_reg = 0x45004, - .enable_mask = BIT(0), + .enable_mask = BIT(8), .hw.init = &(struct clk_init_data){ .name = "gcc_prng_ahb_clk", .parent_names = (const char *[]){ diff --git a/drivers/clk/qcom/gcc-msm8974.c b/drivers/clk/qcom/gcc-msm8974.c index c39d09874e74d..f06a082e3e870 100644 --- a/drivers/clk/qcom/gcc-msm8974.c +++ b/drivers/clk/qcom/gcc-msm8974.c @@ -1783,6 +1783,7 @@ static struct clk_branch gcc_ce1_clk = { "ce1_clk_src", }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index d17eb4528a283..37f96117fd3d0 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -578,7 +578,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE(0, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0, RK3288_CLKSEL_CON(21), 0, 2, MFLAGS, 8, 5, DFLAGS, RK3288_CLKGATE_CON(2), 5, GFLAGS), - MUX(SCLK_MAC, "mac_clk", mux_mac_p, 0, + MUX(SCLK_MAC, "mac_clk", mux_mac_p, CLK_SET_RATE_PARENT, RK3288_CLKSEL_CON(21), 4, 1, MFLAGS), GATE(SCLK_MACREF_OUT, "sclk_macref_out", "mac_clk", 0, RK3288_CLKGATE_CON(5), 3, GFLAGS), diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index 714d6ba782c81..f7890bf652e69 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -85,6 +85,7 @@ #define DIV_PERIL4 0xc560 #define DIV_PERIL5 0xc564 #define E4X12_DIV_CAM1 0xc568 +#define E4X12_GATE_BUS_FSYS1 0xc744 #define GATE_SCLK_CAM 0xc820 #define GATE_IP_CAM 0xc920 #define GATE_IP_TV 0xc924 @@ -1095,6 +1096,7 @@ static struct samsung_gate_clock exynos4x12_gate_clks[] __initdata = { 0), GATE(CLK_PPMUIMAGE, "ppmuimage", "aclk200", E4X12_GATE_IP_IMAGE, 9, 0, 0), + GATE(CLK_TSADC, "tsadc", "aclk133", E4X12_GATE_BUS_FSYS1, 16, 0, 0), GATE(CLK_MIPI_HSI, "mipi_hsi", "aclk133", GATE_IP_FSYS, 10, 0, 0), GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, 0, 0), GATE(CLK_SYSREG, "sysreg", "aclk100", E4X12_GATE_IP_PERIR, 1, diff --git a/drivers/clk/samsung/clk-s5pv210.c b/drivers/clk/samsung/clk-s5pv210.c index e668e479a6970..bdd284249cc3c 100644 --- a/drivers/clk/samsung/clk-s5pv210.c +++ b/drivers/clk/samsung/clk-s5pv210.c @@ -828,6 +828,8 @@ static void __init __s5pv210_clk_init(struct device_node *np, s5pv210_clk_sleep_init(); + samsung_clk_of_add_provider(np, ctx); + pr_info("%s clocks: mout_apll = %ld, mout_mpll = %ld\n" "\tmout_epll = %ld, mout_vpll = %ld\n", is_s5p6442 ? "S5P6442" : "S5PV210", diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index bf12a25eb3a22..0f8db28353c56 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -303,6 +303,8 @@ void __init st_of_flexgen_setup(struct device_node *np) if (!rlock) goto err; + spin_lock_init(rlock); + for (i = 0; i < clk_data->clk_num; i++) { struct clk *clk; const char *clk_name; diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index a917c4c7eaa9c..6ae068ab07c85 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -340,7 +340,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { CLKGEN_FIELD(0x30c, 0xf, 20), CLKGEN_FIELD(0x310, 0xf, 20) }, .lockstatus_present = true, - .lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24), + .lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24), .powerup_polarity = 1, .standby_polarity = 1, .pll_ops = &st_quadfs_pll_c32_ops, diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index fdcff10f6d308..ef6514636bfc9 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -582,7 +582,7 @@ static struct clkgen_mux_data stih416_a9_mux_data = { }; static struct clkgen_mux_data stih407_a9_mux_data = { .offset = 0x1a4, - .shift = 1, + .shift = 0, .width = 2, }; diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 757636d166cff..4ab28cfb8d2a6 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -163,7 +163,6 @@ static struct ti_dt_clk omap3xxx_clks[] = { DT_CLK(NULL, "gpio2_ick", "gpio2_ick"), DT_CLK(NULL, "wdt3_ick", "wdt3_ick"), DT_CLK(NULL, "uart3_ick", "uart3_ick"), - DT_CLK(NULL, "uart4_ick", "uart4_ick"), DT_CLK(NULL, "gpt9_ick", "gpt9_ick"), DT_CLK(NULL, "gpt8_ick", "gpt8_ick"), DT_CLK(NULL, "gpt7_ick", "gpt7_ick"), @@ -308,6 +307,7 @@ static struct ti_dt_clk am35xx_clks[] = { static struct ti_dt_clk omap36xx_clks[] = { DT_CLK(NULL, "omap_192m_alwon_fck", "omap_192m_alwon_fck"), DT_CLK(NULL, "uart4_fck", "uart4_fck"), + DT_CLK(NULL, "uart4_ick", "uart4_ick"), { .node_name = NULL }, }; diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index d86bc46b93bdf..0a1df821860fd 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -252,6 +252,11 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) } clk = of_clk_get_from_provider(&clkspec); + if (IS_ERR(clk)) { + pr_err("%s: failed to get atl clock %d from provider\n", + __func__, i); + return PTR_ERR(clk); + } cdesc = to_atl_desc(__clk_get_hw(clk)); cdesc->cinfo = cinfo; diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index bc96f103bd7ca..9064636a867f2 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -156,8 +156,10 @@ struct clk *icst_clk_register(struct device *dev, icst->lockreg = base + desc->lock_offset; clk = clk_register(dev, &icst->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + kfree(pclone); kfree(icst); + } return clk; } diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c index c6e86a9a2aa3d..5122ef25f5952 100644 --- a/drivers/clk/versatile/clk-sp810.c +++ b/drivers/clk/versatile/clk-sp810.c @@ -128,8 +128,8 @@ static struct clk *clk_sp810_timerclken_of_get(struct of_phandle_args *clkspec, { struct clk_sp810 *sp810 = data; - if (WARN_ON(clkspec->args_count != 1 || clkspec->args[0] > - ARRAY_SIZE(sp810->timerclken))) + if (WARN_ON(clkspec->args_count != 1 || + clkspec->args[0] >= ARRAY_SIZE(sp810->timerclken))) return NULL; return sp810->timerclken[clkspec->args[0]].clk; diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 83564c9cfdbe3..c844616028d20 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -466,15 +466,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); if (mct_int_type == MCT_INT_SPI) { - evt->irq = mct_irqs[MCT_L0_IRQ + cpu]; - if (request_irq(evt->irq, exynos4_mct_tick_isr, - IRQF_TIMER | IRQF_NOBALANCING, - evt->name, mevt)) { - pr_err("exynos-mct: cannot register IRQ %d\n", - evt->irq); + + if (evt->irq == -1) return -EIO; - } - irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu)); + + irq_force_affinity(evt->irq, cpumask_of(cpu)); + enable_irq(evt->irq); } else { enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0); } @@ -487,10 +484,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) static void exynos4_local_timer_stop(struct clock_event_device *evt) { evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); - if (mct_int_type == MCT_INT_SPI) - free_irq(evt->irq, this_cpu_ptr(&percpu_mct_tick)); - else + if (mct_int_type == MCT_INT_SPI) { + if (evt->irq != -1) + disable_irq_nosync(evt->irq); + } else { disable_percpu_irq(mct_irqs[MCT_L0_IRQ]); + } } static int exynos4_mct_cpu_notify(struct notifier_block *self, @@ -522,7 +521,7 @@ static struct notifier_block exynos4_mct_cpu_nb = { static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base) { - int err; + int err, cpu; struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick); struct clk *mct_clk, *tick_clk; @@ -549,7 +548,25 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem WARN(err, "MCT: can't request IRQ %d (%d)\n", mct_irqs[MCT_L0_IRQ], err); } else { - irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0)); + for_each_possible_cpu(cpu) { + int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + struct mct_clock_event_device *pcpu_mevt = + per_cpu_ptr(&percpu_mct_tick, cpu); + + pcpu_mevt->evt.irq = -1; + + irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); + if (request_irq(mct_irq, + exynos4_mct_tick_isr, + IRQF_TIMER | IRQF_NOBALANCING, + pcpu_mevt->name, pcpu_mevt)) { + pr_err("exynos-mct: cannot register IRQ (cpu%d)\n", + cpu); + + continue; + } + pcpu_mevt->evt.irq = mct_irq; + } } err = register_cpu_notifier(&exynos4_mct_cpu_nb); diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index 1098ed3b9b89f..dc45ddb36117d 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -50,6 +50,8 @@ #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t) +#define MIN_OSCR_DELTA 16 + static void __iomem *regbase; static cycle_t vt8500_timer_read(struct clocksource *cs) @@ -80,7 +82,7 @@ static int vt8500_timer_set_next_event(unsigned long cycles, cpu_relax(); writel((unsigned long)alarm, regbase + TIMER_MATCH_VAL); - if ((signed)(alarm - clocksource.read(&clocksource)) <= 16) + if ((signed)(alarm - clocksource.read(&clocksource)) <= MIN_OSCR_DELTA) return -ETIME; writel(1, regbase + TIMER_IER_VAL); @@ -160,7 +162,7 @@ static void __init vt8500_timer_init(struct device_node *np) pr_err("%s: setup_irq failed for %s\n", __func__, clockevent.name); clockevents_config_and_register(&clockevent, VT8500_TIMER_HZ, - 4, 0xf0000000); + MIN_OSCR_DELTA * 2, 0xf0000000); } CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init); diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 30f522848c735..c19e7fc717c30 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -178,26 +178,21 @@ static int cn_call_callback(struct sk_buff *skb) * * It checks skb, netlink header and msg sizes, and calls callback helper. */ -static void cn_rx_skb(struct sk_buff *__skb) +static void cn_rx_skb(struct sk_buff *skb) { struct nlmsghdr *nlh; - struct sk_buff *skb; int len, err; - skb = skb_get(__skb); - if (skb->len >= NLMSG_HDRLEN) { nlh = nlmsg_hdr(skb); len = nlmsg_len(nlh); if (len < (int)sizeof(struct cn_msg) || skb->len < nlh->nlmsg_len || - len > CONNECTOR_MAX_MSG_SIZE) { - kfree_skb(skb); + len > CONNECTOR_MAX_MSG_SIZE) return; - } - err = cn_call_callback(skb); + err = cn_call_callback(skb_get(skb)); if (err < 0) kfree_skb(skb); } diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bab67db54b7eb..663045ce6face 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -255,7 +255,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) rcu_read_unlock(); tol_uV = opp_uV * priv->voltage_tolerance / 100; - if (regulator_is_supported_voltage(cpu_reg, opp_uV, + if (regulator_is_supported_voltage(cpu_reg, + opp_uV - tol_uV, opp_uV + tol_uV)) { if (opp_uV < min_uV) min_uV = opp_uV; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c46..1ee2ab58e37d6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,9 +48,9 @@ static inline int32_t mul_fp(int32_t x, int32_t y) return ((int64_t)x * (int64_t)y) >> FRAC_BITS; } -static inline int32_t div_fp(int32_t x, int32_t y) +static inline int32_t div_fp(s64 x, s64 y) { - return div_s64((int64_t)x << FRAC_BITS, y); + return div64_s64((int64_t)x << FRAC_BITS, y); } static inline int ceiling_fp(int32_t x) @@ -535,7 +535,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) val |= vid; - wrmsrl(MSR_IA32_PERF_CTL, val); + wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } #define BYT_BCLK_FREQS 5 @@ -678,6 +678,7 @@ static struct cpu_defaults knl_params = { .get_max = core_get_max_pstate, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, .set = core_set_pstate, }, }; @@ -760,6 +761,11 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + if (cpu->prev_mperf == mperf) { + local_irq_restore(flags); + return; + } + local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; @@ -794,7 +800,7 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - u32 duration_us; + s64 duration_us; u32 sample_time; /* @@ -821,8 +827,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) * to adjust our busyness. */ sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = (u32) ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); + duration_us = ktime_us_delta(cpu->sample.time, + cpu->last_sample_time); if (duration_us > sample_time * 3) { sample_ratio = div_fp(int_tofp(sample_time), int_tofp(duration_us)); diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 59372077ec7c1..3442764a52938 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -60,6 +60,8 @@ static int nap_loop(struct cpuidle_device *dev, return index; } +/* Register for fastsleep only in oneshot mode of broadcast */ +#ifdef CONFIG_TICK_ONESHOT static int fastsleep_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -83,7 +85,7 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; } - +#endif /* * States for dedicated partition case. */ @@ -209,7 +211,14 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; powernv_states[nr_idle_states].enter = &nap_loop; - } else if (flags[i] & OPAL_PM_SLEEP_ENABLED || + } + + /* + * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come + * within this config dependency check. + */ +#ifdef CONFIG_TICK_ONESHOT + if (flags[i] & OPAL_PM_SLEEP_ENABLED || flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ strcpy(powernv_states[nr_idle_states].name, "FastSleep"); @@ -218,7 +227,7 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].target_residency = 300000; powernv_states[nr_idle_states].enter = &fastsleep_loop; } - +#endif powernv_states[nr_idle_states].exit_latency = ((unsigned int)latency_ns[i]) / 1000; diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 0f9a9dc06a830..fb16d812c8f55 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -260,7 +260,11 @@ static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx) static int atmel_aes_hw_init(struct atmel_aes_dev *dd) { - clk_prepare_enable(dd->iclk); + int err; + + err = clk_prepare_enable(dd->iclk); + if (err) + return err; if (!(dd->flags & AES_FLAGS_INIT)) { atmel_aes_write(dd, AES_CR, AES_CR_SWRST); @@ -1320,7 +1324,6 @@ static int atmel_aes_probe(struct platform_device *pdev) struct crypto_platform_data *pdata; struct device *dev = &pdev->dev; struct resource *aes_res; - unsigned long aes_phys_size; int err; pdata = pdev->dev.platform_data; @@ -1337,7 +1340,7 @@ static int atmel_aes_probe(struct platform_device *pdev) goto aes_dd_err; } - aes_dd = kzalloc(sizeof(struct atmel_aes_dev), GFP_KERNEL); + aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL); if (aes_dd == NULL) { dev_err(dev, "unable to alloc data struct.\n"); err = -ENOMEM; @@ -1368,36 +1371,35 @@ static int atmel_aes_probe(struct platform_device *pdev) goto res_err; } aes_dd->phys_base = aes_res->start; - aes_phys_size = resource_size(aes_res); /* Get the IRQ */ aes_dd->irq = platform_get_irq(pdev, 0); if (aes_dd->irq < 0) { dev_err(dev, "no IRQ resource info\n"); err = aes_dd->irq; - goto aes_irq_err; + goto res_err; } - err = request_irq(aes_dd->irq, atmel_aes_irq, IRQF_SHARED, "atmel-aes", - aes_dd); + err = devm_request_irq(&pdev->dev, aes_dd->irq, atmel_aes_irq, + IRQF_SHARED, "atmel-aes", aes_dd); if (err) { dev_err(dev, "unable to request aes irq.\n"); - goto aes_irq_err; + goto res_err; } /* Initializing the clock */ - aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); + aes_dd->iclk = devm_clk_get(&pdev->dev, "aes_clk"); if (IS_ERR(aes_dd->iclk)) { dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(aes_dd->iclk); - goto clk_err; + goto res_err; } - aes_dd->io_base = ioremap(aes_dd->phys_base, aes_phys_size); + aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res); if (!aes_dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto aes_io_err; + goto res_err; } atmel_aes_hw_version_init(aes_dd); @@ -1434,17 +1436,9 @@ static int atmel_aes_probe(struct platform_device *pdev) err_aes_dma: atmel_aes_buff_cleanup(aes_dd); err_aes_buff: - iounmap(aes_dd->io_base); -aes_io_err: - clk_put(aes_dd->iclk); -clk_err: - free_irq(aes_dd->irq, aes_dd); -aes_irq_err: res_err: tasklet_kill(&aes_dd->done_task); tasklet_kill(&aes_dd->queue_task); - kfree(aes_dd); - aes_dd = NULL; aes_dd_err: dev_err(dev, "initialization failed.\n"); @@ -1469,16 +1463,6 @@ static int atmel_aes_remove(struct platform_device *pdev) atmel_aes_dma_cleanup(aes_dd); - iounmap(aes_dd->io_base); - - clk_put(aes_dd->iclk); - - if (aes_dd->irq > 0) - free_irq(aes_dd->irq, aes_dd); - - kfree(aes_dd); - aes_dd = NULL; - return 0; } diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 5b35433c5399b..a71c97c03c393 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -783,7 +783,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err) dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); - clk_disable_unprepare(dd->iclk); + clk_disable(dd->iclk); if (req->base.complete) req->base.complete(&req->base, err); @@ -794,7 +794,11 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err) static int atmel_sha_hw_init(struct atmel_sha_dev *dd) { - clk_prepare_enable(dd->iclk); + int err; + + err = clk_enable(dd->iclk); + if (err) + return err; if (!(SHA_FLAGS_INIT & dd->flags)) { atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST); @@ -819,7 +823,7 @@ static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd) dev_info(dd->dev, "version: 0x%x\n", dd->hw_version); - clk_disable_unprepare(dd->iclk); + clk_disable(dd->iclk); } static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, @@ -1345,11 +1349,9 @@ static int atmel_sha_probe(struct platform_device *pdev) struct crypto_platform_data *pdata; struct device *dev = &pdev->dev; struct resource *sha_res; - unsigned long sha_phys_size; int err; - sha_dd = devm_kzalloc(&pdev->dev, sizeof(struct atmel_sha_dev), - GFP_KERNEL); + sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL); if (sha_dd == NULL) { dev_err(dev, "unable to alloc data struct.\n"); err = -ENOMEM; @@ -1378,7 +1380,6 @@ static int atmel_sha_probe(struct platform_device *pdev) goto res_err; } sha_dd->phys_base = sha_res->start; - sha_phys_size = resource_size(sha_res); /* Get the IRQ */ sha_dd->irq = platform_get_irq(pdev, 0); @@ -1388,28 +1389,32 @@ static int atmel_sha_probe(struct platform_device *pdev) goto res_err; } - err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha", - sha_dd); + err = devm_request_irq(&pdev->dev, sha_dd->irq, atmel_sha_irq, + IRQF_SHARED, "atmel-sha", sha_dd); if (err) { dev_err(dev, "unable to request sha irq.\n"); goto res_err; } /* Initializing the clock */ - sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); + sha_dd->iclk = devm_clk_get(&pdev->dev, "sha_clk"); if (IS_ERR(sha_dd->iclk)) { dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(sha_dd->iclk); - goto clk_err; + goto res_err; } - sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size); + sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res); if (!sha_dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto sha_io_err; + goto res_err; } + err = clk_prepare(sha_dd->iclk); + if (err) + goto res_err; + atmel_sha_hw_version_init(sha_dd); atmel_sha_get_cap(sha_dd); @@ -1421,12 +1426,12 @@ static int atmel_sha_probe(struct platform_device *pdev) if (IS_ERR(pdata)) { dev_err(&pdev->dev, "platform data not available\n"); err = PTR_ERR(pdata); - goto err_pdata; + goto iclk_unprepare; } } if (!pdata->dma_slave) { err = -ENXIO; - goto err_pdata; + goto iclk_unprepare; } err = atmel_sha_dma_init(sha_dd, pdata); if (err) @@ -1457,12 +1462,8 @@ static int atmel_sha_probe(struct platform_device *pdev) if (sha_dd->caps.has_dma) atmel_sha_dma_cleanup(sha_dd); err_sha_dma: -err_pdata: - iounmap(sha_dd->io_base); -sha_io_err: - clk_put(sha_dd->iclk); -clk_err: - free_irq(sha_dd->irq, sha_dd); +iclk_unprepare: + clk_unprepare(sha_dd->iclk); res_err: tasklet_kill(&sha_dd->done_task); sha_dd_err: @@ -1489,6 +1490,8 @@ static int atmel_sha_remove(struct platform_device *pdev) if (sha_dd->caps.has_dma) atmel_sha_dma_cleanup(sha_dd); + clk_unprepare(sha_dd->iclk); + iounmap(sha_dd->io_base); clk_put(sha_dd->iclk); diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index ca2999709eb4c..2c7a628d0375f 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -218,7 +218,11 @@ static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx) static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd) { - clk_prepare_enable(dd->iclk); + int err; + + err = clk_prepare_enable(dd->iclk); + if (err) + return err; if (!(dd->flags & TDES_FLAGS_INIT)) { atmel_tdes_write(dd, TDES_CR, TDES_CR_SWRST); @@ -1355,7 +1359,6 @@ static int atmel_tdes_probe(struct platform_device *pdev) struct crypto_platform_data *pdata; struct device *dev = &pdev->dev; struct resource *tdes_res; - unsigned long tdes_phys_size; int err; tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL); @@ -1389,7 +1392,6 @@ static int atmel_tdes_probe(struct platform_device *pdev) goto res_err; } tdes_dd->phys_base = tdes_res->start; - tdes_phys_size = resource_size(tdes_res); /* Get the IRQ */ tdes_dd->irq = platform_get_irq(pdev, 0); @@ -1399,26 +1401,26 @@ static int atmel_tdes_probe(struct platform_device *pdev) goto res_err; } - err = request_irq(tdes_dd->irq, atmel_tdes_irq, IRQF_SHARED, - "atmel-tdes", tdes_dd); + err = devm_request_irq(&pdev->dev, tdes_dd->irq, atmel_tdes_irq, + IRQF_SHARED, "atmel-tdes", tdes_dd); if (err) { dev_err(dev, "unable to request tdes irq.\n"); - goto tdes_irq_err; + goto res_err; } /* Initializing the clock */ - tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); + tdes_dd->iclk = devm_clk_get(&pdev->dev, "tdes_clk"); if (IS_ERR(tdes_dd->iclk)) { dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(tdes_dd->iclk); - goto clk_err; + goto res_err; } - tdes_dd->io_base = ioremap(tdes_dd->phys_base, tdes_phys_size); + tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res); if (!tdes_dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto tdes_io_err; + goto res_err; } atmel_tdes_hw_version_init(tdes_dd); @@ -1474,12 +1476,6 @@ static int atmel_tdes_probe(struct platform_device *pdev) err_pdata: atmel_tdes_buff_cleanup(tdes_dd); err_tdes_buff: - iounmap(tdes_dd->io_base); -tdes_io_err: - clk_put(tdes_dd->iclk); -clk_err: - free_irq(tdes_dd->irq, tdes_dd); -tdes_irq_err: res_err: tasklet_kill(&tdes_dd->done_task); tasklet_kill(&tdes_dd->queue_task); @@ -1510,13 +1506,6 @@ static int atmel_tdes_remove(struct platform_device *pdev) atmel_tdes_buff_cleanup(tdes_dd); - iounmap(tdes_dd->io_base); - - clk_put(tdes_dd->iclk); - - if (tdes_dd->irq >= 0) - free_irq(tdes_dd->irq, tdes_dd); - return 0; } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 332c8ef8dae2c..0436997e054b3 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -909,13 +909,14 @@ static int ahash_final_ctx(struct ahash_request *req) state->buflen_1; u32 *sh_desc = ctx->sh_desc_fin, *desc; dma_addr_t ptr = ctx->sh_desc_fin_dma; - int sec4_sg_bytes; + int sec4_sg_bytes, sec4_sg_src_index; int digestsize = crypto_ahash_digestsize(ahash); struct ahash_edesc *edesc; int ret = 0; int sh_len; - sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry); + sec4_sg_src_index = 1 + (buflen ? 1 : 0); + sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + @@ -942,7 +943,7 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 48f453555f1fe..ede9e9e3c419c 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -904,7 +904,6 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt) crypt->mode |= NPE_OP_NOT_IN_PLACE; /* This was never tested by Intel * for more than one dst buffer, I think. */ - BUG_ON(req->dst->length < nbytes); req_ctx->dst = NULL; if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook, flags, DMA_FROM_DEVICE)) diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 67f80813a06f9..e4311ce0cd78c 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -494,8 +494,9 @@ static int ccm_nx_encrypt(struct aead_request *req, static int ccm4309_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); @@ -525,8 +526,9 @@ static int ccm_aes_nx_encrypt(struct aead_request *req) static int ccm4309_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 2617cd4d54dd2..dd7e9f3f5b6b2 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -72,7 +72,7 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm, if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; - memcpy(nx_ctx->priv.ctr.iv, + memcpy(nx_ctx->priv.ctr.nonce, in_key + key_len - CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); @@ -131,14 +131,15 @@ static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc, unsigned int nbytes) { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *iv = nx_ctx->priv.ctr.iv; + u8 iv[16]; + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, desc->info, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; - desc->info = nx_ctx->priv.ctr.iv; + desc->info = iv; return ctr_aes_nx_crypt(desc, dst, src, nbytes); } diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 88c562434bc0b..c6ebeb644db4c 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -330,6 +330,7 @@ static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc, static int gcm_aes_nx_crypt(struct aead_request *req, int enc) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; @@ -339,7 +340,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) spin_lock_irqsave(&nx_ctx->lock, irq_flags); - desc.info = nx_ctx->priv.gcm.iv; + desc.info = rctx->iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; @@ -434,8 +435,8 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) static int gcm_aes_nx_encrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -444,8 +445,8 @@ static int gcm_aes_nx_encrypt(struct aead_request *req) static int gcm_aes_nx_decrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -455,7 +456,8 @@ static int gcm_aes_nx_decrypt(struct aead_request *req) static int gcm4106_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); @@ -467,7 +469,8 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req) static int gcm4106_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 8c2faffab4a35..c2f7d4befb559 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -42,6 +42,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, unsigned int key_len) { struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; switch (key_len) { case AES_KEYSIZE_128: @@ -51,7 +52,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return -EINVAL; } - memcpy(nx_ctx->priv.xcbc.key, in_key, key_len); + memcpy(csbcpb->cpb.aes_xcbc.key, in_key, key_len); return 0; } @@ -148,32 +149,29 @@ static int nx_xcbc_empty(struct shash_desc *desc, u8 *out) return rc; } -static int nx_xcbc_init(struct shash_desc *desc) +static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm) { - struct xcbc_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; - struct nx_sg *out_sg; - int len; + int err; - nx_ctx_init(nx_ctx, HCOP_FC_AES); + err = nx_crypto_ctx_aes_xcbc_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_AES); NX_CPB_SET_KEY_SIZE(csbcpb, NX_KS_AES_128); csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; - memcpy(csbcpb->cpb.aes_xcbc.key, nx_ctx->priv.xcbc.key, AES_BLOCK_SIZE); - memset(nx_ctx->priv.xcbc.key, 0, sizeof *nx_ctx->priv.xcbc.key); - - len = AES_BLOCK_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, nx_ctx->ap->sglen); + return 0; +} - if (len != AES_BLOCK_SIZE) - return -EINVAL; +static int nx_xcbc_init(struct shash_desc *desc) +{ + struct xcbc_state *sctx = shash_desc_ctx(desc); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + memset(sctx, 0, sizeof *sctx); return 0; } @@ -186,6 +184,7 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u32 to_process = 0, leftover, total; unsigned int max_sg_len; unsigned long irq_flags; @@ -213,6 +212,17 @@ static int nx_xcbc_update(struct shash_desc *desc, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = AES_BLOCK_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, nx_ctx->ap->sglen); + + if (data_len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } + + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + do { to_process = total - to_process; to_process = to_process & ~(AES_BLOCK_SIZE - 1); @@ -235,8 +245,10 @@ static int nx_xcbc_update(struct shash_desc *desc, (u8 *) sctx->buffer, &data_len, max_sg_len); - if (data_len != sctx->count) - return -EINVAL; + if (data_len != sctx->count) { + rc = -EINVAL; + goto out; + } } data_len = to_process - sctx->count; @@ -245,8 +257,10 @@ static int nx_xcbc_update(struct shash_desc *desc, &data_len, max_sg_len); - if (data_len != to_process - sctx->count) - return -EINVAL; + if (data_len != to_process - sctx->count) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); @@ -325,15 +339,19 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer, &len, nx_ctx->ap->sglen); - if (len != sctx->count) - return -EINVAL; + if (len != sctx->count) { + rc = -EINVAL; + goto out; + } len = AES_BLOCK_SIZE; out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, nx_ctx->ap->sglen); - if (len != AES_BLOCK_SIZE) - return -EINVAL; + if (len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -372,7 +390,7 @@ struct shash_alg nx_shash_aes_xcbc_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_aes_xcbc_init, + .cra_init = nx_crypto_ctx_aes_xcbc_init2, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 23621da624c35..becb738c897b1 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -29,30 +29,28 @@ #include "nx.h" -static int nx_sha256_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { - struct sha256_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - int len; - int rc; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA256]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); - len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *) sctx->state, - NX_DS_SHA256); + return 0; +} - if (rc) - goto out; +static int nx_sha256_init(struct shash_desc *desc) { + struct sha256_state *sctx = shash_desc_ctx(desc); + + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -64,7 +62,6 @@ static int nx_sha256_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be32(SHA256_H7); sctx->count = 0; -out: return 0; } @@ -74,10 +71,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count % SHA256_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -97,38 +96,57 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + + data_len = SHA256_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { - /* - * to_process: the SHA256_BLOCK_SIZE data chunk to process in - * this update. This value is also restricted by the sg list - * limits. - */ - to_process = total - to_process; - to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + int used_sgs = 0; + struct nx_sg *in_sg = nx_ctx->in_sg; if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(in_sg, + (u8 *) sctx->buf, + &data_len, + max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } + used_sgs = in_sg - nx_ctx->in_sg; } + /* to_process: SHA256_BLOCK_SIZE aligned chunk to be + * processed in this iteration. This value is restricted + * by sg list limits and number of sgs we already used + * for leftover data. (see above) + * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len, + * but because data may not be aligned, we need to account + * for that too. */ + to_process = min_t(u64, total, + (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE); + to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA256); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); - to_process = (data_len + buf_len); + to_process = data_len + buf_len; leftover = total - to_process; /* @@ -173,12 +191,19 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; unsigned long irq_flags; - int rc; + u32 max_sg_len; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count >= SHA256_BLOCK_SIZE) { @@ -195,25 +220,24 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha256.message_bit_length = (u64) (sctx->count * 8); len = sctx->count & (SHA256_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) sctx->buf, + &len, max_sg_len); - if (rc || len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) + if (len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, max_sg_len); - if (rc || len != SHA256_DIGEST_SIZE) + if (len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; goto out; + } + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; goto out; @@ -268,7 +292,7 @@ struct shash_alg nx_shash_sha256_alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha256_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index b3adf10226733..b6e183d58d73d 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -28,30 +28,29 @@ #include "nx.h" -static int nx_sha512_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { - struct sha512_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - int len; - int rc; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA512]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); - len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *)sctx->state, - NX_DS_SHA512); + return 0; +} - if (rc || len != SHA512_DIGEST_SIZE) - goto out; +static int nx_sha512_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -63,7 +62,6 @@ static int nx_sha512_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be64(SHA512_H7); sctx->count[0] = 0; -out: return 0; } @@ -73,10 +71,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count[0] % SHA512_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -96,39 +96,61 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + + data_len = SHA512_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA512_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { - /* - * to_process: the SHA512_BLOCK_SIZE data chunk to process in - * this update. This value is also restricted by the sg list - * limits. - */ - to_process = total - leftover; - to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); - leftover = total - to_process; + int used_sgs = 0; + struct nx_sg *in_sg = nx_ctx->in_sg; if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(in_sg, + (u8 *) sctx->buf, + &data_len, max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } + used_sgs = in_sg - nx_ctx->in_sg; } + /* to_process: SHA512_BLOCK_SIZE aligned chunk to be + * processed in this iteration. This value is restricted + * by sg list limits and number of sgs we already used + * for leftover data. (see above) + * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len, + * but because data may not be aligned, we need to account + * for that too. */ + to_process = min_t(u64, total, + (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE); + to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); + data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA512); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc || data_len != (to_process - buf_len)) + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + + if (data_len != (to_process - buf_len)) { + rc = -EINVAL; goto out; + } - to_process = (data_len + buf_len); + to_process = data_len + buf_len; leftover = total - to_process; /* @@ -172,13 +194,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; + u32 max_sg_len; u64 count0; unsigned long irq_flags; - int rc; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count[0] >= SHA512_BLOCK_SIZE) { @@ -200,24 +229,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha512.message_bit_length_lo = count0; len = sctx->count[0] & (SHA512_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *)sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, &len, + max_sg_len); - if (rc || len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) + if (len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, + max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; @@ -273,7 +298,7 @@ struct shash_alg nx_shash_sha512_alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha512_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 1da6dc59d0dd1..737d33dc50b8e 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -215,8 +215,15 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *nx_dst, * @delta: is the amount we need to crop in order to bound the list. * */ -static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int delta) +static long int trim_sg_list(struct nx_sg *sg, + struct nx_sg *end, + unsigned int delta, + unsigned int *nbytes) { + long int oplen; + long int data_back; + unsigned int is_delta = delta; + while (delta && end > sg) { struct nx_sg *last = end - 1; @@ -228,54 +235,20 @@ static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int d delta -= last->len; } } - return (sg - end) * sizeof(struct nx_sg); -} - -/** - * nx_sha_build_sg_list - walk and build sg list to sha modes - * using right bounds and limits. - * @nx_ctx: NX crypto context for the lists we're building - * @nx_sg: current sg list in or out list - * @op_len: current op_len to be used in order to build a sg list - * @nbytes: number or bytes to be processed - * @offset: buf offset - * @mode: SHA256 or SHA512 - */ -int nx_sha_build_sg_list(struct nx_crypto_ctx *nx_ctx, - struct nx_sg *nx_in_outsg, - s64 *op_len, - unsigned int *nbytes, - u8 *offset, - u32 mode) -{ - unsigned int delta = 0; - unsigned int total = *nbytes; - struct nx_sg *nx_insg = nx_in_outsg; - unsigned int max_sg_len; - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); - - *nbytes = min_t(u64, *nbytes, nx_ctx->ap->databytelen); - nx_insg = nx_build_sg_list(nx_insg, offset, nbytes, max_sg_len); - - switch (mode) { - case NX_DS_SHA256: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA256_BLOCK_SIZE - 1)); - break; - case NX_DS_SHA512: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA512_BLOCK_SIZE - 1)); - break; - default: - return -EINVAL; + /* There are cases where we need to crop list in order to make it + * a block size multiple, but we also need to align data. In order to + * that we need to calculate how much we need to put back to be + * processed + */ + oplen = (sg - end) * sizeof(struct nx_sg); + if (is_delta) { + data_back = (abs(oplen) / AES_BLOCK_SIZE) * sg->len; + data_back = *nbytes - (data_back & ~(AES_BLOCK_SIZE - 1)); + *nbytes -= data_back; } - *op_len = trim_sg_list(nx_in_outsg, nx_insg, delta); - return 0; + return oplen; } /** @@ -330,8 +303,8 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, /* these lengths should be negative, which will indicate to phyp that * the input and output parameters are scatterlists, not linear * buffers */ - nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta); - nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta); + nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta, nbytes); + nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta, nbytes); return 0; } @@ -662,12 +635,14 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode) /* entry points from the crypto tfm initializers */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) { + tfm->crt_aead.reqsize = sizeof(struct nx_ccm_rctx); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_CCM); } int nx_crypto_ctx_aes_gcm_init(struct crypto_tfm *tfm) { + tfm->crt_aead.reqsize = sizeof(struct nx_gcm_rctx); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 6c9ecaaead52f..c3ed83764fef1 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -2,6 +2,8 @@ #ifndef __NX_H__ #define __NX_H__ +#include + #define NX_NAME "nx-crypto" #define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver" #define NX_VERSION "1.0" @@ -91,8 +93,11 @@ struct nx_crypto_driver { #define NX_GCM4106_NONCE_LEN (4) #define NX_GCM_CTR_OFFSET (12) -struct nx_gcm_priv { +struct nx_gcm_rctx { u8 iv[16]; +}; + +struct nx_gcm_priv { u8 iauth_tag[16]; u8 nonce[NX_GCM4106_NONCE_LEN]; }; @@ -100,8 +105,11 @@ struct nx_gcm_priv { #define NX_CCM_AES_KEY_LEN (16) #define NX_CCM4309_AES_KEY_LEN (19) #define NX_CCM4309_NONCE_LEN (3) -struct nx_ccm_priv { +struct nx_ccm_rctx { u8 iv[16]; +}; + +struct nx_ccm_priv { u8 b0[16]; u8 iauth_tag[16]; u8 oauth_tag[16]; @@ -113,7 +121,7 @@ struct nx_xcbc_priv { }; struct nx_ctr_priv { - u8 iv[16]; + u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct nx_crypto_ctx { @@ -153,8 +161,6 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm); void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function); int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op, u32 may_sleep); -int nx_sha_build_sg_list(struct nx_crypto_ctx *, struct nx_sg *, - s64 *, unsigned int *, u8 *, u32); struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int *, u32); int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *, struct scatterlist *, struct scatterlist *, unsigned int *, diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 46307098f8bab..0a70e46d54165 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -536,9 +536,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) dmaengine_terminate_all(dd->dma_lch_in); dmaengine_terminate_all(dd->dma_lch_out); - dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); - dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); - return err; } diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1dc5b0a17cf72..34139a8894a01 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -73,7 +73,8 @@ ICP_QAT_HW_CIPHER_KEY_CONVERT, \ ICP_QAT_HW_CIPHER_DECRYPT) -static atomic_t active_dev; +static DEFINE_MUTEX(algs_lock); +static unsigned int active_devs; struct qat_alg_buf { uint32_t len; @@ -1271,7 +1272,10 @@ static struct crypto_alg qat_algs[] = { { int qat_algs_register(void) { - if (atomic_add_return(1, &active_dev) == 1) { + int ret = 0; + + mutex_lock(&algs_lock); + if (++active_devs == 1) { int i; for (i = 0; i < ARRAY_SIZE(qat_algs); i++) @@ -1280,21 +1284,25 @@ int qat_algs_register(void) CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC : CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; - return crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs)); + ret = crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs)); } - return 0; + mutex_unlock(&algs_lock); + return ret; } int qat_algs_unregister(void) { - if (atomic_sub_return(1, &active_dev) == 0) - return crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs)); - return 0; + int ret = 0; + + mutex_lock(&algs_lock); + if (--active_devs == 0) + ret = crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs)); + mutex_unlock(&algs_lock); + return ret; } int qat_algs_init(void) { - atomic_set(&active_dev, 0); crypto_get_default_rng(); return 0; } diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414afa29a8..f062158d4dc9e 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -925,7 +925,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, sg_count--; link_tbl_ptr--; } - be16_add_cpu(&link_tbl_ptr->len, cryptlen); + link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len) + + cryptlen); /* tag end of link table */ link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -2561,6 +2562,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, break; default: dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type); + kfree(t_alg); return ERR_PTR(-EINVAL); } diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index ab300ea19434e..41f93334cc441 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c @@ -80,6 +80,7 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); pagefault_enable(); @@ -97,6 +98,7 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_encrypt(src, dst, &ctx->enc_key); pagefault_enable(); } @@ -111,6 +113,7 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_decrypt(src, dst, &ctx->dec_key); pagefault_enable(); } diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 1a559b7dddb5f..c8e7f653e5d36 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -81,6 +81,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); pagefault_enable(); @@ -108,6 +109,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); @@ -143,6 +145,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 96dbee4bf4a6d..266e708d63df9 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -79,6 +79,7 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); pagefault_enable(); @@ -97,6 +98,7 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); pagefault_enable(); @@ -127,6 +129,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr, (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv); pagefault_enable(); diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index d0ffe277af5ca..917b3f09e724e 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c @@ -116,6 +116,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_init_p8(ctx->htable, (const u64 *) key); pagefault_enable(); @@ -142,6 +143,7 @@ static int p8_ghash_update(struct shash_desc *desc, GHASH_DIGEST_SIZE - dctx->bytes); pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); @@ -154,6 +156,7 @@ static int p8_ghash_update(struct shash_desc *desc, if (len) { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, src, len); pagefault_enable(); @@ -182,6 +185,7 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out) dctx->buffer[i] = 0; pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl index 0a6f899839ddb..d8429cb71f027 100644 --- a/drivers/crypto/vmx/ghashp8-ppc.pl +++ b/drivers/crypto/vmx/ghashp8-ppc.pl @@ -61,6 +61,12 @@ mtspr 256,r0 li r10,0x30 lvx_u $H,0,r4 # load H + le?xor r7,r7,r7 + le?addi r7,r7,0x8 # need a vperm start with 08 + le?lvsr 5,0,r7 + le?vspltisb 6,0x0f + le?vxor 5,5,6 # set a b-endian mask + le?vperm $H,$H,$H,5 vspltisb $xC2,-16 # 0xf0 vspltisb $t0,1 # one diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7992164ea9ec2..8d8c35623f2a8 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -648,16 +648,17 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_sa = mem; desc->lld.mbr_da = atchan->sconfig.dst_addr; } - desc->lld.mbr_cfg = atchan->cfg; - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); + dwidth = at_xdmac_get_dwidth(atchan->cfg); fixed_dwidth = IS_ALIGNED(len, 1 << dwidth) - ? at_xdmac_get_dwidth(desc->lld.mbr_cfg) + ? dwidth : AT_XDMAC_CC_DWIDTH_BYTE; desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ | (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE) /* descriptor fetch */ | (len >> fixed_dwidth); /* microblock length */ + desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) | + AT_XDMAC_CC_DWIDTH(fixed_dwidth); dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); @@ -1229,6 +1230,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) at_xdmac_remove_xfer(atchan, desc); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1361,6 +1363,8 @@ static int atmel_xdmac_resume(struct device *dev) atchan = to_at_xdmac_chan(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_resume(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 1022c2e1a2b0a..ebffc744cb1b1 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -536,16 +536,17 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr); /* Called with dwc->lock held and all DMAC interrupts disabled */ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, - u32 status_err, u32 status_xfer) + u32 status_block, u32 status_err, u32 status_xfer) { unsigned long flags; - if (dwc->mask) { + if (status_block & dwc->mask) { void (*callback)(void *param); void *callback_param; dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n", channel_readl(dwc, LLP)); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); callback = dwc->cdesc->period_callback; callback_param = dwc->cdesc->period_callback_param; @@ -577,6 +578,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, channel_writel(dwc, CTL_LO, 0); channel_writel(dwc, CTL_HI, 0); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -585,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, spin_unlock_irqrestore(&dwc->lock, flags); } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); } /* ------------------------------------------------------------------------- */ @@ -593,10 +598,12 @@ static void dw_dma_tasklet(unsigned long data) { struct dw_dma *dw = (struct dw_dma *)data; struct dw_dma_chan *dwc; + u32 status_block; u32 status_xfer; u32 status_err; int i; + status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); @@ -605,16 +612,15 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) - dwc_handle_cyclic(dw, dwc, status_err, status_xfer); + dwc_handle_cyclic(dw, dwc, status_block, status_err, + status_xfer); else if (status_err & (1 << i)) dwc_handle_error(dw, dwc); else if (status_xfer & (1 << i)) dwc_scan_descriptors(dw, dwc); } - /* - * Re-enable interrupts. - */ + /* Re-enable interrupts */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -635,6 +641,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) * softirq handler. */ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); status = dma_readl(dw, STATUS_INT); @@ -645,6 +652,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /* Try to recover */ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); @@ -1111,6 +1119,7 @@ static void dw_dma_off(struct dw_dma *dw) dma_writel(dw, CFG, 0); channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); @@ -1216,6 +1225,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); channel_clear_bit(dw, MASK.ERROR, dwc->mask); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1245,7 +1255,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_dma *dw = to_dw_dma(chan->device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1255,25 +1265,10 @@ int dw_dma_cyclic_start(struct dma_chan *chan) spin_lock_irqsave(&dwc->lock, flags); - /* Assert channel is idle */ - if (dma_readl(dw, CH_EN) & dwc->mask) { - dev_err(chan2dev(&dwc->chan), - "%s: BUG: Attempted to start non-idle channel\n", - __func__); - dwc_dump_chan_regs(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - return -EBUSY; - } - - dma_writel(dw, CLEAR.ERROR, dwc->mask); - dma_writel(dw, CLEAR.XFER, dwc->mask); + /* Enable interrupts to perform cyclic transfer */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); - /* Setup DMAC channel registers */ - channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys); - channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); - channel_writel(dwc, CTL_HI, 0); - - channel_set_bit(dw, CH_EN, dwc->mask); + dwc_dostart(dwc, dwc->cdesc->desc[0]); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1479,6 +1474,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) dwc_chan_disable(dw, dwc); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -1569,9 +1565,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Force dma off, just in case */ dw_dma_off(dw); - /* Disable BLOCK interrupts as well */ - channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); - /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev, sizeof(struct dw_desc), 4, 0); @@ -1591,7 +1584,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) INIT_LIST_HEAD(&dw->dma.channels); for (i = 0; i < nr_channels; i++) { struct dw_dma_chan *dwc = &dw->chan[i]; - int r = nr_channels - i - 1; dwc->chan.device = &dw->dma; dma_cookie_init(&dwc->chan); @@ -1603,7 +1595,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* 7 is highest priority & 0 is lowest. */ if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) - dwc->priority = r; + dwc->priority = nr_channels - i - 1; else dwc->priority = i; @@ -1622,6 +1614,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { unsigned int dwc_params; + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; void __iomem *addr = chip->regs + r * sizeof(u32); dwc_params = dma_read_byaddr(addr, DWC_PARAMS); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 1c56001df676c..50f1b422dee3b 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -273,7 +273,8 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) dma_cookie_t cookie = 0; int busy = mv_chan_is_busy(mv_chan); u32 current_desc = mv_chan_get_current_desc(mv_chan); - int seen_current = 0; + int current_cleaned = 0; + struct mv_xor_desc *hw_desc; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc); @@ -285,38 +286,57 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) list_for_each_entry_safe(iter, _iter, &mv_chan->chain, chain_node) { - prefetch(_iter); - prefetch(&_iter->async_tx); - /* do not advance past the current descriptor loaded into the - * hardware channel, subsequent descriptors are either in - * process or have not been submitted - */ - if (seen_current) - break; + /* clean finished descriptors */ + hw_desc = iter->hw_desc; + if (hw_desc->status & XOR_DESC_SUCCESS) { + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, + cookie); - /* stop the search if we reach the current descriptor and the - * channel is busy - */ - if (iter->async_tx.phys == current_desc) { - seen_current = 1; - if (busy) + /* done processing desc, clean slot */ + mv_xor_clean_slot(iter, mv_chan); + + /* break if we did cleaned the current */ + if (iter->async_tx.phys == current_desc) { + current_cleaned = 1; + break; + } + } else { + if (iter->async_tx.phys == current_desc) { + current_cleaned = 0; break; + } } - - cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); - - if (mv_xor_clean_slot(iter, mv_chan)) - break; } if ((busy == 0) && !list_empty(&mv_chan->chain)) { - struct mv_xor_desc_slot *chain_head; - chain_head = list_entry(mv_chan->chain.next, - struct mv_xor_desc_slot, - chain_node); - - mv_xor_start_new_chain(mv_chan, chain_head); + if (current_cleaned) { + /* + * current descriptor cleaned and removed, run + * from list head + */ + iter = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + chain_node); + mv_xor_start_new_chain(mv_chan, iter); + } else { + if (!list_is_last(&iter->chain_node, &mv_chan->chain)) { + /* + * descriptors are still waiting after + * current, trigger them + */ + iter = list_entry(iter->chain_node.next, + struct mv_xor_desc_slot, + chain_node); + mv_xor_start_new_chain(mv_chan, iter); + } else { + /* + * some descriptors are still waiting + * to be cleaned + */ + tasklet_schedule(&mv_chan->irq_tasklet); + } + } } if (cookie > 0) diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 91958dba39a21..0e302b3a33ade 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -31,6 +31,7 @@ #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 #define XOR_DESCRIPTOR_SWAP BIT(14) +#define XOR_DESC_SUCCESS 0x40000000 #define XOR_DESC_DMA_OWNED BIT(31) #define XOR_DESC_EOD_INT_EN BIT(31) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 340f9e607cd8b..3dabc52b96154 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2328,7 +2328,7 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) desc->txd.callback = last->txd.callback; desc->txd.callback_param = last->txd.callback_param; } - last->last = false; + desc->last = false; dma_cookie_assign(&desc->txd); @@ -2621,6 +2621,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, desc->rqcfg.brst_len = 1; desc->rqcfg.brst_len = get_burst_len(desc, len); + desc->bytes_requested = len; desc->txd.flags = flags; diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 592af5f0cf391..53587377e6726 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -435,16 +435,13 @@ void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, */ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { - int status; - if (!edac_dev->edac_check) return; - status = cancel_delayed_work(&edac_dev->work); - if (status == 0) { - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + edac_dev->op_state = OP_OFFLINE; + + cancel_delayed_work_sync(&edac_dev->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index af3be1914dbb8..63ceb2d985654 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -581,18 +581,10 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec, */ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) { - int status; - - if (mci->op_state != OP_RUNNING_POLL) - return; - - status = cancel_delayed_work(&mci->work); - if (status == 0) { - edac_dbg(0, "not canceled, flush the queue\n"); + mci->op_state = OP_OFFLINE; - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + cancel_delayed_work_sync(&mci->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 112d63ad11547..67dc90365389a 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -977,21 +977,26 @@ static int edac_create_debug_nodes(struct mem_ctl_info *mci) int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups) { + char *name; int i, err; /* * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus->name) + name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!name) return -ENOMEM; + mci->bus->name = name; + edac_dbg(0, "creating bus %s\n", mci->bus->name); err = bus_register(mci->bus); - if (err < 0) - goto fail_free_name; + if (err < 0) { + kfree(name); + return err; + } /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; @@ -1060,8 +1065,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, device_unregister(&mci->dev); fail_unregister_bus: bus_unregister(mci->bus); -fail_free_name: - kfree(mci->bus->name); + kfree(name); + return err; } @@ -1092,10 +1097,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { + const char *name = mci->bus->name; + edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); bus_unregister(mci->bus); - kfree(mci->bus->name); + kfree(name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 2cf44b4db80c8..b4b38603b804e 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -274,13 +274,12 @@ static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, */ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) { - int status; - edac_dbg(0, "\n"); - status = cancel_delayed_work(&pci->work); - if (status == 0) - flush_workqueue(edac_workqueue); + pci->op_state = OP_OFFLINE; + + cancel_delayed_work_sync(&pci->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 7e98084d36451..afea7fc625ccb 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -151,7 +151,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) l2c->ctl_name = "octeon_l2c_err"; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_l2t_err l2t_err; union cvmx_l2d_err l2d_err; diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index bb19e0732681c..cda6dab5067a5 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -234,7 +234,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) layers[0].size = 1; layers[0].is_virt_csrow = false; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_lmcx_mem_cfg0 cfg0; cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 0f83c33a7d1fc..2ab6cf24c9598 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -73,7 +73,7 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + if (OCTEON_IS_OCTEON2()) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 3515b381c1312..711d8ad74f116 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -920,7 +920,7 @@ static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) */ for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *csi = &mci->csrows[row]; + struct csrow_info *csi = mci->csrows[row]; /* * Get the configuration settings for this diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 1acf57ba4c86b..cd6b9c72c8ac5 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1608,6 +1608,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, { struct sbridge_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; + u8 saw_chan_mask = 0; int i; for (i = 0; i < sbridge_dev->n_devs; i++) { @@ -1641,6 +1642,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, { int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0; pvt->pci_tad[id] = pdev; + saw_chan_mask |= 1 << id; } break; case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO: @@ -1661,10 +1663,8 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) goto enodev; - for (i = 0; i < NUM_CHANNELS; i++) { - if (!pvt->pci_tad[i]) - goto enodev; - } + if (saw_chan_mask != 0x0f) + goto enodev; return 0; enodev: diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index f51d376d10ba6..c2f5117fd8cb0 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev, reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0); ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet); + /* JMicron JMB38x often shows 0 at first read, just ignore it */ + if (!ohci->it_context_support) { + ohci_notice(ohci, "overriding IsoXmitIntMask\n"); + ohci->it_context_support = 0xf; + } reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0); ohci->it_context_mask = ohci->it_context_support; ohci->n_it = hweight32(ohci->it_context_mask); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 97b1616aa3918..bba843c2b0ace 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -89,9 +89,9 @@ static void dmi_table(u8 *buf, /* * Stop when we have seen all the items the table claimed to have - * (SMBIOS < 3.0 only) OR we reach an end-of-table marker OR we run - * off the end of the table (should never happen but sometimes does - * on bogus implementations.) + * (SMBIOS < 3.0 only) OR we reach an end-of-table marker (SMBIOS + * >= 3.0 only) OR we run off the end of the table (should never + * happen but sometimes does on bogus implementations.) */ while ((!dmi_num || i < dmi_num) && (data - buf + sizeof(struct dmi_header)) <= dmi_len) { @@ -110,8 +110,13 @@ static void dmi_table(u8 *buf, /* * 7.45 End-of-Table (Type 127) [SMBIOS reference spec v3.0.0] + * For tables behind a 64-bit entry point, we have no item + * count and no exact table length, so stop on end-of-table + * marker. For tables behind a 32-bit entry point, we have + * seen OEM structures behind the end-of-table marker on + * some systems, so don't trust it. */ - if (dm->type == DMI_ENTRY_END_OF_TABLE) + if (!dmi_num && dm->type == DMI_ENTRY_END_OF_TABLE) break; data += 2; diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 4fd9961d552e8..d425374254384 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -305,10 +305,17 @@ const char *cper_mem_err_unpack(struct trace_seq *p, return ret; } -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, + int len) { struct cper_mem_err_compact cmem; + /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ + if (len == sizeof(struct cper_sec_mem_err_old) && + (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { + pr_err(FW_WARN "valid bits set for fields beyond structure\n"); + return; + } if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); if (mem->validation_bits & CPER_MEM_VALID_PA) @@ -405,8 +412,10 @@ static void cper_estatus_print_section( } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); printk("%s""section_type: memory error\n", newpfx); - if (gdata->error_data_length >= sizeof(*mem_err)) - cper_print_mem(newpfx, mem_err); + if (gdata->error_data_length >= + sizeof(struct cper_sec_mem_err_old)) + cper_print_mem(newpfx, mem_err, + gdata->error_data_length); else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3061bb8629dc3..63226e9036a15 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -57,6 +57,11 @@ bool efi_runtime_disabled(void) static int __init parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "noruntime")) disable_runtime = true; @@ -65,7 +70,6 @@ static int __init parse_efi_cmdline(char *str) early_param("efi", parse_efi_cmdline); static struct kobject *efi_kobj; -static struct kobject *efivars_kobj; /* * Let's not leave out systab information that snuck into @@ -212,10 +216,9 @@ static int __init efisubsys_init(void) goto err_remove_group; /* and the standard mountpoint for efivarfs */ - efivars_kobj = kobject_create_and_add("efivars", efi_kobj); - if (!efivars_kobj) { + error = sysfs_create_mount_point(efi_kobj, "efivars"); + if (error) { pr_err("efivars: Subsystem registration failed.\n"); - error = -ENOMEM; goto err_remove_group; } diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index e29560e6b40b0..950c87f5d2793 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -13,6 +13,7 @@ */ #include +#include #include #include "efistub.h" @@ -305,6 +306,44 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, */ #define EFI_RT_VIRTUAL_BASE 0x40000000 +static int cmp_mem_desc(const void *l, const void *r) +{ + const efi_memory_desc_t *left = l, *right = r; + + return (left->phys_addr > right->phys_addr) ? 1 : -1; +} + +/* + * Returns whether region @left ends exactly where region @right starts, + * or false if either argument is NULL. + */ +static bool regions_are_adjacent(efi_memory_desc_t *left, + efi_memory_desc_t *right) +{ + u64 left_end; + + if (left == NULL || right == NULL) + return false; + + left_end = left->phys_addr + left->num_pages * EFI_PAGE_SIZE; + + return left_end == right->phys_addr; +} + +/* + * Returns whether region @left and region @right have compatible memory type + * mapping attributes, and are both EFI_MEMORY_RUNTIME regions. + */ +static bool regions_have_compatible_memory_type_attrs(efi_memory_desc_t *left, + efi_memory_desc_t *right) +{ + static const u64 mem_type_mask = EFI_MEMORY_WB | EFI_MEMORY_WT | + EFI_MEMORY_WC | EFI_MEMORY_UC | + EFI_MEMORY_RUNTIME; + + return ((left->attribute ^ right->attribute) & mem_type_mask) == 0; +} + /* * efi_get_virtmap() - create a virtual mapping for the EFI memory map * @@ -317,33 +356,52 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, int *count) { u64 efi_virt_base = EFI_RT_VIRTUAL_BASE; - efi_memory_desc_t *out = runtime_map; + efi_memory_desc_t *in, *prev = NULL, *out = runtime_map; int l; - for (l = 0; l < map_size; l += desc_size) { - efi_memory_desc_t *in = (void *)memory_map + l; + /* + * To work around potential issues with the Properties Table feature + * introduced in UEFI 2.5, which may split PE/COFF executable images + * in memory into several RuntimeServicesCode and RuntimeServicesData + * regions, we need to preserve the relative offsets between adjacent + * EFI_MEMORY_RUNTIME regions with the same memory type attributes. + * The easiest way to find adjacent regions is to sort the memory map + * before traversing it. + */ + sort(memory_map, map_size / desc_size, desc_size, cmp_mem_desc, NULL); + + for (l = 0; l < map_size; l += desc_size, prev = in) { u64 paddr, size; + in = (void *)memory_map + l; if (!(in->attribute & EFI_MEMORY_RUNTIME)) continue; + paddr = in->phys_addr; + size = in->num_pages * EFI_PAGE_SIZE; + /* * Make the mapping compatible with 64k pages: this allows * a 4k page size kernel to kexec a 64k page size kernel and * vice versa. */ - paddr = round_down(in->phys_addr, SZ_64K); - size = round_up(in->num_pages * EFI_PAGE_SIZE + - in->phys_addr - paddr, SZ_64K); - - /* - * Avoid wasting memory on PTEs by choosing a virtual base that - * is compatible with section mappings if this region has the - * appropriate size and physical alignment. (Sections are 2 MB - * on 4k granule kernels) - */ - if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) - efi_virt_base = round_up(efi_virt_base, SZ_2M); + if (!regions_are_adjacent(prev, in) || + !regions_have_compatible_memory_type_attrs(prev, in)) { + + paddr = round_down(in->phys_addr, SZ_64K); + size += in->phys_addr - paddr; + + /* + * Avoid wasting memory on PTEs by choosing a virtual + * base that is compatible with section mappings if this + * region has the appropriate size and physical + * alignment. (Sections are 2 MB on 4k granule kernels) + */ + if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) + efi_virt_base = round_up(efi_virt_base, SZ_2M); + else + efi_virt_base = round_up(efi_virt_base, SZ_64K); + } in->virt_addr = efi_virt_base + in->phys_addr - paddr; efi_virt_base += size; diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 91a7ffe831350..ab457fc00e755 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -255,6 +255,7 @@ static struct irq_chip crystalcove_irqchip = { .irq_set_type = crystalcove_irq_type, .irq_bus_lock = crystalcove_bus_lock, .irq_bus_sync_unlock = crystalcove_bus_sync_unlock, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index fd39774659484..1e14a6c74ed13 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -177,8 +177,17 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on) struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct gpio_rcar_priv *p = container_of(gc, struct gpio_rcar_priv, gpio_chip); - - irq_set_irq_wake(p->irq_parent, on); + int error; + + if (p->irq_parent) { + error = irq_set_irq_wake(p->irq_parent, on); + if (error) { + dev_dbg(&p->pdev->dev, + "irq %u doesn't support irq_set_wake\n", + p->irq_parent); + p->irq_parent = 0; + } + } if (!p->clk) return 0; diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 035dacc93382f..fd5c5f3370f6b 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_device *dev) } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); - if (data & 0x400) + if (data & 0x40) ast->dram_bus_width = 16; else ast->dram_bus_width = 32; diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index 60b0c13d7ff5c..aebc4595afa02 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -559,7 +559,7 @@ static int atmel_hlcdc_dc_drm_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int atmel_hlcdc_dc_drm_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/bridge/ptn3460.c b/drivers/gpu/drm/bridge/ptn3460.c index 9d2f053382e18..63a09e4079f35 100644 --- a/drivers/gpu/drm/bridge/ptn3460.c +++ b/drivers/gpu/drm/bridge/ptn3460.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3007b44e6bf44..800a025dd0629 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2749,8 +2749,11 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; - /* For some reason crtc x/y offsets are signed internally. */ - if (crtc_req->x > INT_MAX || crtc_req->y > INT_MAX) + /* + * Universal plane src offsets are only 16.16, prevent havoc for + * drivers using universal plane code internally. + */ + if (crtc_req->x & 0xffff0000 || crtc_req->y & 0xffff0000) return -ERANGE; drm_modeset_lock_all(dev); @@ -5048,12 +5051,9 @@ void drm_mode_config_reset(struct drm_device *dev) if (encoder->funcs->reset) encoder->funcs->reset(encoder); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - connector->status = connector_status_unknown; - + list_for_each_entry(connector, &dev->mode_config.connector_list, head) if (connector->funcs->reset) connector->funcs->reset(connector); - } } EXPORT_SYMBOL(drm_mode_config_reset); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 132581ca4ad84..0ec9ad50ba7c3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -804,8 +804,6 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref) struct drm_dp_mst_port *port, *tmp; bool wake_tx = false; - cancel_work_sync(&mstb->mgr->work); - /* * destroy all ports - don't need lock * as there are no more references to the mst branch @@ -863,20 +861,33 @@ static void drm_dp_destroy_port(struct kref *kref) { struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref); struct drm_dp_mst_topology_mgr *mgr = port->mgr; + if (!port->input) { port->vcpi.num_slots = 0; kfree(port->cached_edid); - if (port->connector) - (*port->mgr->cbs->destroy_connector)(mgr, port->connector); - drm_dp_port_teardown_pdt(port, port->pdt); - if (!port->input && port->vcpi.vcpi > 0) - drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); + /* + * The only time we don't have a connector + * on an output port is if the connector init + * fails. + */ + if (port->connector) { + /* we can't destroy the connector here, as + * we might be holding the mode_config.mutex + * from an EDID retrieval */ + + mutex_lock(&mgr->destroy_connector_lock); + list_add(&port->next, &mgr->destroy_connector_list); + mutex_unlock(&mgr->destroy_connector_lock); + schedule_work(&mgr->destroy_connector_work); + return; + } + /* no need to clean up vcpi + * as if we have no connector we never setup a vcpi */ + drm_dp_port_teardown_pdt(port, port->pdt); } kfree(port); - - (*mgr->cbs->hotplug)(mgr); } static void drm_dp_put_port(struct drm_dp_mst_port *port) @@ -962,17 +973,17 @@ static struct drm_dp_mst_port *drm_dp_get_port(struct drm_dp_mst_branch *mstb, u static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port, u8 *rad) { - int lct = port->parent->lct; + int parent_lct = port->parent->lct; int shift = 4; - int idx = lct / 2; - if (lct > 1) { - memcpy(rad, port->parent->rad, idx); - shift = (lct % 2) ? 4 : 0; + int idx = (parent_lct - 1) / 2; + if (parent_lct > 1) { + memcpy(rad, port->parent->rad, idx + 1); + shift = (parent_lct % 2) ? 4 : 0; } else rad[0] = 0; rad[idx] |= port->port_num << shift; - return lct + 1; + return parent_lct + 1; } /* @@ -1028,7 +1039,7 @@ static void build_mst_prop_path(struct drm_dp_mst_port *port, snprintf(proppath, proppath_size, "mst:%d", mstb->mgr->conn_base_id); for (i = 0; i < (mstb->lct - 1); i++) { int shift = (i % 2) ? 0 : 4; - int port_num = mstb->rad[i / 2] >> shift; + int port_num = (mstb->rad[i / 2] >> shift) & 0xf; snprintf(temp, sizeof(temp), "-%d", port_num); strlcat(proppath, temp, proppath_size); } @@ -1106,12 +1117,21 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, char proppath[255]; build_mst_prop_path(port, mstb, proppath, sizeof(proppath)); port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr, port, proppath); - + if (!port->connector) { + /* remove it from the port list */ + mutex_lock(&mstb->mgr->lock); + list_del(&port->next); + mutex_unlock(&mstb->mgr->lock); + /* drop port list reference */ + drm_dp_put_port(port); + goto out; + } if (port->port_num >= 8) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); } } +out: /* put reference to this port */ drm_dp_put_port(port); } @@ -1163,25 +1183,73 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ struct drm_dp_mst_port *port; int i; /* find the port by iterating down */ + + mutex_lock(&mgr->lock); mstb = mgr->mst_primary; for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; - int port_num = rad[i / 2] >> shift; + int port_num = (rad[i / 2] >> shift) & 0xf; list_for_each_entry(port, &mstb->ports, next) { if (port->port_num == port_num) { - if (!port->mstb) { + mstb = port->mstb; + if (!mstb) { DRM_ERROR("failed to lookup MSTB with lct %d, rad %02x\n", lct, rad[0]); - return NULL; + goto out; } - mstb = port->mstb; break; } } } kref_get(&mstb->kref); +out: + mutex_unlock(&mgr->lock); + return mstb; +} + +static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( + struct drm_dp_mst_branch *mstb, + uint8_t *guid) +{ + struct drm_dp_mst_branch *found_mstb; + struct drm_dp_mst_port *port; + + list_for_each_entry(port, &mstb->ports, next) { + if (!port->mstb) + continue; + + if (port->guid_valid && memcmp(port->guid, guid, 16) == 0) + return port->mstb; + + found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); + + if (found_mstb) + return found_mstb; + } + + return NULL; +} + +static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid( + struct drm_dp_mst_topology_mgr *mgr, + uint8_t *guid) +{ + struct drm_dp_mst_branch *mstb; + + /* find the port by iterating down */ + mutex_lock(&mgr->lock); + + if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0) + mstb = mgr->mst_primary; + else + mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); + + if (mstb) + kref_get(&mstb->kref); + + mutex_unlock(&mgr->lock); return mstb; } @@ -1189,7 +1257,7 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m struct drm_dp_mst_branch *mstb) { struct drm_dp_mst_port *port; - + struct drm_dp_mst_branch *mstb_child; if (!mstb->link_address_sent) { drm_dp_send_link_address(mgr, mstb); mstb->link_address_sent = true; @@ -1204,17 +1272,31 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m if (!port->available_pbn) drm_dp_send_enum_path_resources(mgr, mstb, port); - if (port->mstb) - drm_dp_check_and_send_link_address(mgr, port->mstb); + if (port->mstb) { + mstb_child = drm_dp_get_validated_mstb_ref(mgr, port->mstb); + if (mstb_child) { + drm_dp_check_and_send_link_address(mgr, mstb_child); + drm_dp_put_mst_branch_device(mstb_child); + } + } } } static void drm_dp_mst_link_probe_work(struct work_struct *work) { struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, work); + struct drm_dp_mst_branch *mstb; - drm_dp_check_and_send_link_address(mgr, mgr->mst_primary); - + mutex_lock(&mgr->lock); + mstb = mgr->mst_primary; + if (mstb) { + kref_get(&mstb->kref); + } + mutex_unlock(&mgr->lock); + if (mstb) { + drm_dp_check_and_send_link_address(mgr, mstb); + drm_dp_put_mst_branch_device(mstb); + } } static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, @@ -1269,7 +1351,6 @@ static int drm_dp_send_sideband_msg(struct drm_dp_mst_topology_mgr *mgr, goto retry; } DRM_DEBUG_KMS("failed to dpcd write %d %d\n", tosend, ret); - WARN(1, "fail\n"); return -EIO; } @@ -1283,6 +1364,7 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr, struct drm_dp_sideband_msg_tx *txmsg) { struct drm_dp_mst_branch *mstb = txmsg->dst; + u8 req_type; /* both msg slots are full */ if (txmsg->seqno == -1) { @@ -1299,7 +1381,13 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr, txmsg->seqno = 1; mstb->tx_slots[txmsg->seqno] = txmsg; } - hdr->broadcast = 0; + + req_type = txmsg->msg[0] & 0x7f; + if (req_type == DP_CONNECTION_STATUS_NOTIFY || + req_type == DP_RESOURCE_STATUS_NOTIFY) + hdr->broadcast = 1; + else + hdr->broadcast = 0; hdr->path_msg = txmsg->path_msg; hdr->lct = mstb->lct; hdr->lcr = mstb->lct - 1; @@ -1401,26 +1489,18 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) } /* called holding qlock */ -static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) +static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_sideband_msg_tx *txmsg) { - struct drm_dp_sideband_msg_tx *txmsg; int ret; /* construct a chunk from the first msg in the tx_msg queue */ - if (list_empty(&mgr->tx_msg_upq)) { - mgr->tx_up_in_progress = false; - return; - } - - txmsg = list_first_entry(&mgr->tx_msg_upq, struct drm_dp_sideband_msg_tx, next); ret = process_single_tx_qlock(mgr, txmsg, true); - if (ret == 1) { - /* up txmsgs aren't put in slots - so free after we send it */ - list_del(&txmsg->next); - kfree(txmsg); - } else if (ret) + + if (ret != 1) DRM_DEBUG_KMS("failed to send msg in q %d\n", ret); - mgr->tx_up_in_progress = true; + + txmsg->dst->tx_slots[txmsg->seqno] = NULL; } static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, @@ -1805,11 +1885,12 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, drm_dp_encode_up_ack_reply(txmsg, req_type); mutex_lock(&mgr->qlock); - list_add_tail(&txmsg->next, &mgr->tx_msg_upq); - if (!mgr->tx_up_in_progress) { - process_single_up_tx_qlock(mgr); - } + + process_single_up_tx_qlock(mgr, txmsg); + mutex_unlock(&mgr->qlock); + + kfree(txmsg); return 0; } @@ -1953,6 +2034,8 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr) drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, DP_MST_EN | DP_UPSTREAM_IS_SRC); mutex_unlock(&mgr->lock); + flush_work(&mgr->work); + flush_work(&mgr->destroy_connector_work); } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend); @@ -2104,28 +2187,50 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) if (mgr->up_req_recv.have_eomt) { struct drm_dp_sideband_msg_req_body msg; - struct drm_dp_mst_branch *mstb; + struct drm_dp_mst_branch *mstb = NULL; bool seqno; - mstb = drm_dp_get_mst_branch_device(mgr, - mgr->up_req_recv.initial_hdr.lct, - mgr->up_req_recv.initial_hdr.rad); - if (!mstb) { - DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); - memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); - return 0; + + if (!mgr->up_req_recv.initial_hdr.broadcast) { + mstb = drm_dp_get_mst_branch_device(mgr, + mgr->up_req_recv.initial_hdr.lct, + mgr->up_req_recv.initial_hdr.rad); + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } } seqno = mgr->up_req_recv.initial_hdr.seqno; drm_dp_sideband_parse_req(&mgr->up_req_recv, &msg); if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { - drm_dp_send_up_ack_reply(mgr, mstb, msg.req_type, seqno, false); + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); + + if (!mstb) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.conn_stat.guid); + + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } + drm_dp_update_port(mstb, &msg.u.conn_stat); DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type); (*mgr->cbs->hotplug)(mgr); } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { - drm_dp_send_up_ack_reply(mgr, mstb, msg.req_type, seqno, false); + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); + if (!mstb) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.resource_stat.guid); + + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } + DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn); } @@ -2305,6 +2410,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn); if (pbn == port->vcpi.pbn) { *slots = port->vcpi.num_slots; + drm_dp_put_port(port); return true; } } @@ -2464,32 +2570,31 @@ EXPORT_SYMBOL(drm_dp_check_act_status); */ int drm_dp_calc_pbn_mode(int clock, int bpp) { - fixed20_12 pix_bw; - fixed20_12 fbpp; - fixed20_12 result; - fixed20_12 margin, tmp; - u32 res; - - pix_bw.full = dfixed_const(clock); - fbpp.full = dfixed_const(bpp); - tmp.full = dfixed_const(8); - fbpp.full = dfixed_div(fbpp, tmp); - - result.full = dfixed_mul(pix_bw, fbpp); - margin.full = dfixed_const(54); - tmp.full = dfixed_const(64); - margin.full = dfixed_div(margin, tmp); - result.full = dfixed_div(result, margin); - - margin.full = dfixed_const(1006); - tmp.full = dfixed_const(1000); - margin.full = dfixed_div(margin, tmp); - result.full = dfixed_mul(result, margin); - - result.full = dfixed_div(result, tmp); - result.full = dfixed_ceil(result); - res = dfixed_trunc(result); - return res; + u64 kbps; + s64 peak_kbps; + u32 numerator; + u32 denominator; + + kbps = clock * bpp; + + /* + * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 + * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on + * common multiplier to render an integer PBN for all link rate/lane + * counts combinations + * calculate + * peak_kbps *= (1006/1000) + * peak_kbps *= (64/54) + * peak_kbps *= 8 convert to bytes + */ + + numerator = 64 * 1006; + denominator = 54 * 8 * 1000 * 1000; + + kbps *= numerator; + peak_kbps = drm_fixp_from_fraction(kbps, denominator); + + return drm_fixp2int_ceil(peak_kbps); } EXPORT_SYMBOL(drm_dp_calc_pbn_mode); @@ -2497,11 +2602,23 @@ static int test_calc_pbn_mode(void) { int ret; ret = drm_dp_calc_pbn_mode(154000, 30); - if (ret != 689) + if (ret != 689) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 154000, 30, 689, ret); return -EINVAL; + } ret = drm_dp_calc_pbn_mode(234000, 30); - if (ret != 1047) + if (ret != 1047) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 234000, 30, 1047, ret); + return -EINVAL; + } + ret = drm_dp_calc_pbn_mode(297000, 24); + if (ret != 1063) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 297000, 24, 1063, ret); return -EINVAL; + } return 0; } @@ -2632,6 +2749,39 @@ static void drm_dp_tx_work(struct work_struct *work) mutex_unlock(&mgr->qlock); } +static void drm_dp_destroy_connector_work(struct work_struct *work) +{ + struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); + struct drm_dp_mst_port *port; + bool send_hotplug = false; + /* + * Not a regular list traverse as we have to drop the destroy + * connector lock before destroying the connector, to avoid AB->BA + * ordering between this lock and the config mutex. + */ + for (;;) { + mutex_lock(&mgr->destroy_connector_lock); + port = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_dp_mst_port, next); + if (!port) { + mutex_unlock(&mgr->destroy_connector_lock); + break; + } + list_del(&port->next); + mutex_unlock(&mgr->destroy_connector_lock); + + mgr->cbs->destroy_connector(mgr, port->connector); + + drm_dp_port_teardown_pdt(port, port->pdt); + + if (!port->input && port->vcpi.vcpi > 0) + drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); + kfree(port); + send_hotplug = true; + } + if (send_hotplug) + (*mgr->cbs->hotplug)(mgr); +} + /** * drm_dp_mst_topology_mgr_init - initialise a topology manager * @mgr: manager struct to initialise @@ -2651,10 +2801,12 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->lock); mutex_init(&mgr->qlock); mutex_init(&mgr->payload_lock); - INIT_LIST_HEAD(&mgr->tx_msg_upq); + mutex_init(&mgr->destroy_connector_lock); INIT_LIST_HEAD(&mgr->tx_msg_downq); + INIT_LIST_HEAD(&mgr->destroy_connector_list); INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work); INIT_WORK(&mgr->tx_work, drm_dp_tx_work); + INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work); init_waitqueue_head(&mgr->tx_waitq); mgr->dev = dev; mgr->aux = aux; @@ -2679,6 +2831,8 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init); */ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr) { + flush_work(&mgr->work); + flush_work(&mgr->destroy_connector_work); mutex_lock(&mgr->payload_lock); kfree(mgr->payloads); mgr->payloads = NULL; @@ -2713,12 +2867,13 @@ static int drm_dp_mst_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs if (msgs[num - 1].flags & I2C_M_RD) reading = true; - if (!reading) { + if (!reading || (num - 1 > DP_REMOTE_I2C_READ_MAX_TRANSACTIONS)) { DRM_DEBUG_KMS("Unsupported I2C transaction for MST device\n"); ret = -EIO; goto out; } + memset(&msg, 0, sizeof(msg)); msg.req_type = DP_REMOTE_I2C_READ; msg.u.i2c_read.num_transactions = num - 1; msg.u.i2c_read.port_number = port->port_num; diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index aa8bbb460c571..9cfcd0aef0dfa 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -70,6 +70,8 @@ #define DRM_IOCTL_WAIT_VBLANK32 DRM_IOWR(0x3a, drm_wait_vblank32_t) +#define DRM_IOCTL_MODE_ADDFB232 DRM_IOWR(0xb8, drm_mode_fb_cmd232_t) + typedef struct drm_version_32 { int version_major; /**< Major version */ int version_minor; /**< Minor version */ @@ -1016,6 +1018,63 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return 0; } +typedef struct drm_mode_fb_cmd232 { + u32 fb_id; + u32 width; + u32 height; + u32 pixel_format; + u32 flags; + u32 handles[4]; + u32 pitches[4]; + u32 offsets[4]; + u64 modifier[4]; +} __attribute__((packed)) drm_mode_fb_cmd232_t; + +static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct drm_mode_fb_cmd232 __user *argp = (void __user *)arg; + struct drm_mode_fb_cmd232 req32; + struct drm_mode_fb_cmd2 __user *req64; + int i; + int err; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + req64 = compat_alloc_user_space(sizeof(*req64)); + + if (!access_ok(VERIFY_WRITE, req64, sizeof(*req64)) + || __put_user(req32.width, &req64->width) + || __put_user(req32.height, &req64->height) + || __put_user(req32.pixel_format, &req64->pixel_format) + || __put_user(req32.flags, &req64->flags)) + return -EFAULT; + + for (i = 0; i < 4; i++) { + if (__put_user(req32.handles[i], &req64->handles[i])) + return -EFAULT; + if (__put_user(req32.pitches[i], &req64->pitches[i])) + return -EFAULT; + if (__put_user(req32.offsets[i], &req64->offsets[i])) + return -EFAULT; + if (__put_user(req32.modifier[i], &req64->modifier[i])) + return -EFAULT; + } + + err = drm_ioctl(file, DRM_IOCTL_MODE_ADDFB2, (unsigned long)req64); + if (err) + return err; + + if (__get_user(req32.fb_id, &req64->fb_id)) + return -EFAULT; + + if (copy_to_user(argp, &req32, sizeof(req32))) + return -EFAULT; + + return 0; +} + static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, [DRM_IOCTL_NR(DRM_IOCTL_GET_UNIQUE32)] = compat_drm_getunique, @@ -1048,6 +1107,7 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw, #endif [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, + [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2, }; /** diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index f861361a635e0..4924d381b6642 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -61,6 +61,9 @@ int drm_legacy_lock(struct drm_device *dev, void *data, struct drm_master *master = file_priv->master; int ret = 0; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + ++file_priv->lock_count; if (lock->context == DRM_KERNEL_CONTEXT) { @@ -153,6 +156,9 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_ struct drm_lock *lock = data; struct drm_master *master = file_priv->master; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", task_pid_nr(current), lock->context); diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 63503879a676c..0d75e75b1da34 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -195,7 +195,8 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect mode_flags |= DRM_MODE_FLAG_3D_MASK; list_for_each_entry(mode, &connector->modes, head) { - mode->status = drm_mode_validate_basic(mode); + if (mode->status == MODE_OK) + mode->status = drm_mode_validate_basic(mode); if (mode->status == MODE_OK) mode->status = drm_mode_validate_size(mode, maxX, maxY); diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index eb7e61078a5b6..92586b0af3ab5 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -235,18 +235,12 @@ static ssize_t dpms_show(struct device *device, char *buf) { struct drm_connector *connector = to_drm_connector(device); - struct drm_device *dev = connector->dev; - uint64_t dpms_status; - int ret; + int dpms; - ret = drm_object_property_get_value(&connector->base, - dev->mode_config.dpms_property, - &dpms_status); - if (ret) - return 0; + dpms = READ_ONCE(connector->dpms); return snprintf(buf, PAGE_SIZE, "%s\n", - drm_get_dpms_name((int)dpms_status)); + drm_get_dpms_name(dpms)); } static ssize_t enabled_show(struct device *device, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a19d2c71e2050..fb91df1631d9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -647,15 +647,18 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) pci_disable_device(drm_dev->pdev); /* - * During hibernation on some GEN4 platforms the BIOS may try to access + * During hibernation on some platforms the BIOS may try to access * the device even though it's already in D3 and hang the machine. So * leave the device in D0 on those platforms and hope the BIOS will - * power down the device properly. Platforms where this was seen: - * Lenovo Thinkpad X301, X61s + * power down the device properly. The issue was seen on multiple old + * GENs with different BIOS vendors, so having an explicit blacklist + * is inpractical; apply the workaround on everything pre GEN6. The + * platforms where the issue was seen: + * Lenovo Thinkpad X301, X61s, X60, T60, X41 + * Fujitsu FSC S7110 + * Acer Aspire 1830T */ - if (!(hibernation && - drm_dev->pdev->subsystem_vendor == PCI_VENDOR_ID_LENOVO && - INTEL_INFO(dev_priv)->gen == 4)) + if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6)) pci_set_power_state(drm_dev->pdev, PCI_D3hot); return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8ae6f7f06b3a0..7d53d7e154556 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3190,15 +3190,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) #define I915_READ64_2x32(lower_reg, upper_reg) ({ \ - u32 upper = I915_READ(upper_reg); \ - u32 lower = I915_READ(lower_reg); \ - u32 tmp = I915_READ(upper_reg); \ - if (upper != tmp) { \ - upper = tmp; \ - lower = I915_READ(lower_reg); \ - WARN_ON(I915_READ(upper_reg) != upper); \ - } \ - (u64)upper << 32 | lower; }) + u32 upper, lower, old_upper, loop = 0; \ + upper = I915_READ(upper_reg); \ + do { \ + old_upper = upper; \ + lower = I915_READ(lower_reg); \ + upper = I915_READ(upper_reg); \ + } while (upper != old_upper && loop++ < 2); \ + (u64)upper << 32 | lower; }) #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2d0995e7afc37..596bce56e3792 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2401,6 +2401,7 @@ int __i915_add_request(struct intel_engine_cs *ring, } request->emitted_jiffies = jiffies; + ring->last_submitted_seqno = request->seqno; list_add_tail(&request->list, &ring->request_list); request->file_priv = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f3e84c44d0091..4decf518d1060 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -317,6 +317,10 @@ void i915_gem_context_reset(struct drm_device *dev) i915_gem_context_unreference(lctx); ring->last_context = NULL; } + + /* Force the GPU state to be reinitialised on enabling */ + if (ring->default_context) + ring->default_context->legacy_hw_ctx.initialized = false; } } @@ -704,7 +708,7 @@ static int do_switch(struct intel_engine_cs *ring, goto unpin_out; } - if (!to->legacy_hw_ctx.initialized) { + if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) { hw_flags |= MI_RESTORE_INHIBIT; /* NB: If we inhibit the restore, the context is not allowed to * die because future work may end up depending on valid address diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e793ed43..479024a4caadd 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1025,6 +1025,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; + obj->dirty = 1; /* be paranoid */ obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; @@ -1032,7 +1033,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, ring); if (obj->base.write_domain) { - obj->dirty = 1; i915_gem_request_assign(&obj->last_write_req, req); intel_fb_obj_invalidate(obj, ring, ORIGIN_CS); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0239fbff7bf72..ad90fa3045e51 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -502,17 +502,17 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct page *page_table; if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) - continue; + break; pd = ppgtt->pdp.page_directory[pdpe]; if (WARN_ON(!pd->page_table[pde])) - continue; + break; pt = pd->page_table[pde]; if (WARN_ON(!pt->page)) - continue; + break; page_table = pt->page; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6377b22269ad1..7ee23d1d1e744 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -464,7 +464,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, } /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ - args->phys_swizzle_mode = args->swizzle_mode; + if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) + args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; + else + args->phys_swizzle_mode = args->swizzle_mode; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1719078c763ac..ce175d05260bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -776,7 +776,10 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { * Also note, that the object created here is not currently a "first class" * object, in that several ioctls are banned. These are the CPU access * ioctls: mmap(), pwrite and pread. In practice, you are expected to use - * direct access via your pointer rather than use those ioctls. + * direct access via your pointer rather than use those ioctls. Another + * restriction is that we do not allow userptr surfaces to be pinned to the + * hardware and so we reject any attempt to create a framebuffer out of a + * userptr. * * If you think this is a good interface to use to pass GPU memory between * drivers, please use dma-buf instead. In fact, wherever possible use diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c index 176de6322e4d0..23aa04cded6b0 100644 --- a/drivers/gpu/drm/i915/i915_ioc32.c +++ b/drivers/gpu/drm/i915/i915_ioc32.c @@ -204,7 +204,7 @@ long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) drm_ioctl_compat_t *fn = NULL; int ret; - if (nr < DRM_COMMAND_BASE) + if (nr < DRM_COMMAND_BASE || nr >= DRM_COMMAND_END) return drm_compat_ioctl(filp, cmd, arg); if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(i915_compat_ioctls)) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6d494432b19f6..b0df8d10482ae 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2650,18 +2650,11 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static struct drm_i915_gem_request * -ring_last_request(struct intel_engine_cs *ring) -{ - return list_entry(ring->request_list.prev, - struct drm_i915_gem_request, list); -} - static bool -ring_idle(struct intel_engine_cs *ring) +ring_idle(struct intel_engine_cs *ring, u32 seqno) { return (list_empty(&ring->request_list) || - i915_gem_request_completed(ring_last_request(ring), false)); + i915_seqno_passed(seqno, ring->last_submitted_seqno)); } static bool @@ -2883,7 +2876,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) acthd = intel_ring_get_active_head(ring); if (ring->hangcheck.seqno == seqno) { - if (ring_idle(ring)) { + if (ring_idle(ring, seqno)) { ring->hangcheck.action = HANGCHECK_IDLE; if (waitqueue_active(&ring->irq_queue)) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 773d1d24e604c..a30db4b4050e4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3209,6 +3209,7 @@ enum skl_disp_power_wells { #define BLM_POLARITY_PNV (1 << 0) /* pnv only */ #define BLC_HIST_CTL (dev_priv->info.display_mmio_offset + 0x61260) +#define BLM_HISTOGRAM_ENABLE (1 << 31) /* New registers for PCH-split platforms. Safe where new bits show up, the * register layout machtes with gen4 BLC_PWM_CTL[12]. */ diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c684085cb56ac..fadf9865709e5 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -41,7 +41,7 @@ find_section(struct bdb_header *bdb, int section_id) { u8 *base = (u8 *)bdb; int index = 0; - u16 total, current_size; + u32 total, current_size; u8 current_id; /* skip to first section */ @@ -56,6 +56,10 @@ find_section(struct bdb_header *bdb, int section_id) current_size = *((u16 *)(base + index)); index += 2; + /* The MIPI Sequence Block v3+ has a separate size field. */ + if (current_id == BDB_MIPI_SEQUENCE && *(base + index) >= 3) + current_size = *((const u32 *)(base + index + 1)); + if (index + current_size > total) return NULL; @@ -845,6 +849,12 @@ parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb) return; } + /* Fail gracefully for forward incompatible sequence block. */ + if (sequence->version >= 3) { + DRM_ERROR("Unable to parse MIPI Sequence Block v3+\n"); + return; + } + DRM_DEBUG_DRIVER("Found MIPI sequence block\n"); block_size = get_blocksize(sequence); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d0f3cbc87474c..b103773df2a34 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1699,6 +1699,8 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) I915_READ(DPLL(!crtc->pipe)) | DPLL_DVO_2X_MODE); } + I915_WRITE(reg, dpll); + /* Wait for the clocks to stabilize. */ POSTING_READ(reg); udelay(150); @@ -10389,11 +10391,21 @@ connected_sink_compute_bpp(struct intel_connector *connector, pipe_config->pipe_bpp = connector->base.display_info.bpc*3; } - /* Clamp bpp to 8 on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0 && bpp > 24) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", - bpp); - pipe_config->pipe_bpp = 24; + /* Clamp bpp to default limit on screens without EDID 1.4 */ + if (connector->base.display_info.bpc == 0) { + int type = connector->base.connector_type; + int clamp_bpp = 24; + + /* Fall back to 18 bpp when DP sink capability is unknown. */ + if (type == DRM_MODE_CONNECTOR_DisplayPort || + type == DRM_MODE_CONNECTOR_eDP) + clamp_bpp = 18; + + if (bpp > clamp_bpp) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", + bpp, clamp_bpp); + pipe_config->pipe_bpp = clamp_bpp; + } } } @@ -12499,6 +12511,16 @@ intel_check_primary_plane(struct drm_plane *plane, intel_crtc->atomic.wait_vblank = true; } + /* + * FIXME: Actually if we will still have any other plane enabled + * on the pipe we could let IPS enabled still, but for + * now lets consider that when we make primary invisible + * by setting DSPCNTR to 0 on update_primary_plane function + * IPS needs to be disable. + */ + if (!state->visible || !fb) + intel_crtc->atomic.disable_ips = true; + intel_crtc->atomic.fb_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); @@ -12590,6 +12612,9 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc) if (intel_crtc->atomic.disable_fbc) intel_fbc_disable(dev); + if (intel_crtc->atomic.disable_ips) + hsw_disable_ips(intel_crtc); + if (intel_crtc->atomic.pre_disable_primary) intel_pre_disable_primary(crtc); @@ -13199,6 +13224,11 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct drm_i915_gem_object *obj = intel_fb->obj; + if (obj->userptr.mm) { + DRM_DEBUG("attempting to use a userptr for a framebuffer, denied\n"); + return -EINVAL; + } + return drm_gem_handle_create(file, &obj->base, handle); } @@ -13768,6 +13798,24 @@ void intel_modeset_init(struct drm_device *dev) if (INTEL_INFO(dev)->num_pipes == 0) return; + /* + * There may be no VBT; and if the BIOS enabled SSC we can + * just keep using it to avoid unnecessary flicker. Whereas if the + * BIOS isn't using it, don't assume it will work even if the VBT + * indicates as much. + */ + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { + bool bios_lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & + DREF_SSC1_ENABLE); + + if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { + DRM_DEBUG_KMS("SSC %sabled by BIOS, overriding VBT which says %sabled\n", + bios_lvds_use_ssc ? "en" : "dis", + dev_priv->vbt.lvds_use_ssc ? "en" : "dis"); + dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; + } + } + intel_init_display(dev); intel_init_audio(dev); @@ -14253,7 +14301,6 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, void intel_modeset_gem_init(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *c; struct drm_i915_gem_object *obj; int ret; @@ -14262,16 +14309,6 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev); mutex_unlock(&dev->struct_mutex); - /* - * There may be no VBT; and if the BIOS enabled SSC we can - * just keep using it to avoid unnecessary flicker. Whereas if the - * BIOS isn't using it, don't assume it will work even if the VBT - * indicates as much. - */ - if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) - dev_priv->vbt.lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & - DREF_SSC1_ENABLE); - intel_modeset_init_hw(dev); intel_setup_overlay(dev); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d714a4b5711e4..fb2983f77141b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1150,6 +1150,19 @@ intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) return (intel_dp_max_link_bw(intel_dp) >> 3) + 1; } +static bool intel_dp_source_supports_hbr2(struct drm_device *dev) +{ + /* WaDisableHBR2:skl */ + if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) + return false; + + if ((IS_HASWELL(dev) && !IS_HSW_ULX(dev)) || IS_BROADWELL(dev) || + (INTEL_INFO(dev)->gen >= 9)) + return true; + else + return false; +} + static int intel_dp_source_rates(struct drm_device *dev, const int **source_rates) { @@ -1163,11 +1176,8 @@ intel_dp_source_rates(struct drm_device *dev, const int **source_rates) *source_rates = default_rates; - if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) - /* WaDisableHBR2:skl */ - return (DP_LINK_BW_2_7 >> 3) + 1; - else if (INTEL_INFO(dev)->gen >= 8 || - (IS_HASWELL(dev) && !IS_HSW_ULX(dev))) + /* This depends on the fact that 5.4 is last value in the array */ + if (intel_dp_source_supports_hbr2(dev)) return (DP_LINK_BW_5_4 >> 3) + 1; else return (DP_LINK_BW_2_7 >> 3) + 1; @@ -3783,10 +3793,15 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) } } - /* Training Pattern 3 support, both source and sink */ + /* Training Pattern 3 support, Intel platforms that support HBR2 alone + * have support for TP3 hence that check is used along with dpcd check + * to ensure TP3 can be enabled. + * SKL < B0: due it's WaDisableHBR2 is the only exception where TP3 is + * supported but still not enabled. + */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x12 && intel_dp->dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED && - (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)) { + intel_dp_source_supports_hbr2(dev)) { intel_dp->use_tps3 = true; DRM_DEBUG_KMS("Displayport TPS3 supported\n"); } else @@ -4676,9 +4691,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) intel_dp_probe_oui(intel_dp); - if (!intel_dp_probe_mst(intel_dp)) + if (!intel_dp_probe_mst(intel_dp)) { + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + intel_dp_check_link_status(intel_dp); + drm_modeset_unlock(&dev->mode_config.connection_mutex); goto mst_fail; - + } } else { if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) @@ -4686,10 +4704,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - /* - * we'll check the link status via the normal hot plug path later - - * but for short hpds we should check it now - */ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 5cb47482d29fb..88c557551b897 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -439,9 +439,9 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_mode_connector_set_path_property(connector, pathprop); drm_reinit_primary_mode_group(dev); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_connector_add_to_fbdev(intel_connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_connector_register(&intel_connector->base); return connector; } @@ -452,16 +452,16 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; /* need to nuke the connector */ - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_connector_dpms(connector, DRM_MODE_DPMS_OFF); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); intel_connector->unregister(intel_connector); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_connector_remove_from_fbdev(intel_connector); drm_connector_cleanup(connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_reinit_primary_mode_group(dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 897f17db08af4..68d1f74a74038 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -424,6 +424,7 @@ struct intel_crtc_atomic_commit { /* Sleepable operations to perform before commit */ bool wait_for_flips; bool disable_fbc; + bool disable_ips; bool pre_disable_primary; bool update_wm; unsigned disabled_planes; diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 51966426addfb..c7a0b8d8fac9d 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1036,11 +1036,7 @@ void intel_dsi_init(struct drm_device *dev) intel_connector->unregister = intel_connector_unregister; /* Pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI port C */ - if (dev_priv->vbt.dsi.config->dual_link) { - /* XXX: does dual link work on either pipe? */ - intel_encoder->crtc_mask = (1 << PIPE_A); - intel_dsi->ports = ((1 << PORT_A) | (1 << PORT_C)); - } else if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIA) { + if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIA) { intel_encoder->crtc_mask = (1 << PIPE_A); intel_dsi->ports = (1 << PORT_A); } else if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIC) { @@ -1048,6 +1044,9 @@ void intel_dsi_init(struct drm_device *dev) intel_dsi->ports = (1 << PORT_C); } + if (dev_priv->vbt.dsi.config->dual_link) + intel_dsi->ports = ((1 << PORT_A) | (1 << PORT_C)); + /* Create a DSI host (and a device) for each port. */ for_each_dsi_port(port, intel_dsi->ports) { struct intel_dsi_host *host; diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index d2cd8d5b27a16..82f8e20cca740 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -207,7 +207,12 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) gpio = *data++; /* pull up/down */ - action = *data++; + action = *data++ & 1; + + if (gpio >= ARRAY_SIZE(gtable)) { + DRM_DEBUG_KMS("unknown gpio %u\n", gpio); + goto out; + } function = gtable[gpio].function_reg; pad = gtable[gpio].pad_reg; @@ -226,6 +231,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) vlv_gpio_nc_write(dev_priv, pad, val); mutex_unlock(&dev_priv->dpio_lock); +out: return data; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 424e621977871..72f1bb8b0499b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -848,6 +848,8 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); if (ret) goto unpin_ctx_obj; + + ctx_obj->dirty = true; } return ret; @@ -1296,6 +1298,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 08532d4ffe0ac..2bf92cba4a554 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -879,6 +879,14 @@ static void i9xx_enable_backlight(struct intel_connector *connector) /* XXX: combine this into above write? */ intel_panel_actually_set_backlight(connector, panel->backlight.level); + + /* + * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is + * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2 + * that has backlight. + */ + if (IS_GEN2(dev)) + I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } static void i965_enable_backlight(struct intel_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 005b5e04de4d7..b7e20dee64c47 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -342,6 +342,7 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -412,6 +413,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { flags |= PIPE_CONTROL_TLB_INVALIDATE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe05ad6fd..94514d364d25f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -266,6 +266,13 @@ struct intel_engine_cs { * Do we have some not yet emitted requests outstanding? */ struct drm_i915_gem_request *outstanding_lazy_request; + /** + * Seqno of request most recently submitted to request_list. + * Used exclusively by hang checker to avoid grabbing lock while + * inspecting request list. + */ + u32 last_submitted_seqno; + bool gpu_caches_dirty; wait_queue_head_t irq_queue; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ff2a74651dd48..a18807ec8371b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1220,10 +1220,12 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_reg_read *reg = data; struct register_whitelist const *entry = whitelist; + unsigned size; + u64 offset; int i, ret = 0; for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { - if (entry->offset == reg->offset && + if (entry->offset == (reg->offset & -entry->size) && (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask)) break; } @@ -1231,23 +1233,33 @@ int i915_reg_read_ioctl(struct drm_device *dev, if (i == ARRAY_SIZE(whitelist)) return -EINVAL; + /* We use the low bits to encode extra flags as the register should + * be naturally aligned (and those that are not so aligned merely + * limit the available flags for that register). + */ + offset = entry->offset; + size = entry->size; + size |= reg->offset ^ offset; + intel_runtime_pm_get(dev_priv); - switch (entry->size) { + switch (size) { + case 8 | 1: + reg->val = I915_READ64_2x32(offset, offset+4); + break; case 8: - reg->val = I915_READ64(reg->offset); + reg->val = I915_READ64(offset); break; case 4: - reg->val = I915_READ(reg->offset); + reg->val = I915_READ(offset); break; case 2: - reg->val = I915_READ16(reg->offset); + reg->val = I915_READ16(offset); break; case 1: - reg->val = I915_READ8(reg->offset); + reg->val = I915_READ8(offset); break; default: - MISSING_CASE(entry->size); ret = -EINVAL; goto out; } diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 3162040bc3148..05490ef5a2aa4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -969,10 +969,13 @@ nouveau_connector_hotplug(struct nvif_notify *notify) NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name); + mutex_lock(&drm->dev->mode_config.mutex); if (plugged) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); else drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + mutex_unlock(&drm->dev->mode_config.mutex); + drm_helper_hpd_irq_event(connector->dev); } diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 89049335b7383..cd6dae08175e4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -863,8 +863,10 @@ nouveau_drm_preclose(struct drm_device *dev, struct drm_file *fpriv) pm_runtime_get_sync(dev->dev); + mutex_lock(&cli->mutex); if (cli->abi16) nouveau_abi16_fini(cli->abi16); + mutex_unlock(&cli->mutex); mutex_lock(&drm->client.mutex); list_del(&cli->head); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 6751553abe4af..567791b27d6df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -178,8 +178,30 @@ nouveau_fbcon_sync(struct fb_info *info) return 0; } +static int +nouveau_fbcon_open(struct fb_info *info, int user) +{ + struct nouveau_fbdev *fbcon = info->par; + struct nouveau_drm *drm = nouveau_drm(fbcon->dev); + int ret = pm_runtime_get_sync(drm->dev->dev); + if (ret < 0 && ret != -EACCES) + return ret; + return 0; +} + +static int +nouveau_fbcon_release(struct fb_info *info, int user) +{ + struct nouveau_fbdev *fbcon = info->par; + struct nouveau_drm *drm = nouveau_drm(fbcon->dev); + pm_runtime_put(drm->dev->dev); + return 0; +} + static struct fb_ops nouveau_fbcon_ops = { .owner = THIS_MODULE, + .fb_open = nouveau_fbcon_open, + .fb_release = nouveau_fbcon_release, .fb_check_var = drm_fb_helper_check_var, .fb_set_par = drm_fb_helper_set_par, .fb_fillrect = nouveau_fbcon_fillrect, @@ -195,6 +217,8 @@ static struct fb_ops nouveau_fbcon_ops = { static struct fb_ops nouveau_fbcon_sw_ops = { .owner = THIS_MODULE, + .fb_open = nouveau_fbcon_open, + .fb_release = nouveau_fbcon_release, .fb_check_var = drm_fb_helper_check_var, .fb_set_par = drm_fb_helper_set_par, .fb_fillrect = cfb_fillrect, diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 0e690bf19fc9c..58c959265b1a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -227,11 +227,12 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nvkm_vma *vma; - if (nvbo->bo.mem.mem_type == TTM_PL_TT) + if (is_power_of_2(nvbo->valid_domains)) + rep->domain = nvbo->valid_domains; + else if (nvbo->bo.mem.mem_type == TTM_PL_TT) rep->domain = NOUVEAU_GEM_DOMAIN_GART; else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; - rep->offset = nvbo->bo.offset; if (cli->vm) { vma = nouveau_bo_vma_find(nvbo, cli->vm); diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index 4ef602c5469d2..495c57644ced9 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -203,7 +203,7 @@ nv04_fbcon_accel_init(struct fb_info *info) if (ret) return ret; - if (RING_SPACE(chan, 49)) { + if (RING_SPACE(chan, 49 + (device->info.chipset >= 0x11 ? 4 : 0))) { nouveau_fbcon_gpu_lockup(info); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 7da7958556a3a..981342d142ff6 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -979,7 +979,7 @@ nv50_crtc_cursor_show_hide(struct nouveau_crtc *nv_crtc, bool show, bool update) { struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev); - if (show && nv_crtc->cursor.nvbo) + if (show && nv_crtc->cursor.nvbo && nv_crtc->base.enabled) nv50_crtc_cursor_show(nv_crtc); else nv50_crtc_cursor_hide(nv_crtc); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c index 80614f1b20747..282143f49d72e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c @@ -50,7 +50,12 @@ nv04_instobj_dtor(struct nvkm_object *object) { struct nv04_instmem_priv *priv = (void *)nvkm_instmem(object); struct nv04_instobj_priv *node = (void *)object; + struct nvkm_subdev *subdev = (void *)priv; + + mutex_lock(&subdev->mutex); nvkm_mm_free(&priv->heap, &node->mem); + mutex_unlock(&subdev->mutex); + nvkm_instobj_destroy(&node->base); } @@ -62,6 +67,7 @@ nv04_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nv04_instmem_priv *priv = (void *)nvkm_instmem(parent); struct nv04_instobj_priv *node; struct nvkm_instobj_args *args = data; + struct nvkm_subdev *subdev = (void *)priv; int ret; if (!args->align) @@ -72,8 +78,10 @@ nv04_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; + mutex_lock(&subdev->mutex); ret = nvkm_mm_head(&priv->heap, 0, 1, args->size, args->size, args->align, &node->mem); + mutex_unlock(&subdev->mutex); if (ret) return ret; diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 97823644d3474..f33251d67914c 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -505,6 +505,7 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_SURFACE_CMD_CREATE; + cmd->flags = QXL_SURF_FLAG_KEEP_DATA; cmd->u.surface_create.format = surf->surf.format; cmd->u.surface_create.width = surf->surf.width; cmd->u.surface_create.height = surf->surf.height; diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 4a0a8b29b0a1e..52921a8712303 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -160,9 +160,35 @@ static int qxl_add_monitors_config_modes(struct drm_connector *connector, *pwidth = head->width; *pheight = head->height; drm_mode_probed_add(connector, mode); + /* remember the last custom size for mode validation */ + qdev->monitors_config_width = mode->hdisplay; + qdev->monitors_config_height = mode->vdisplay; return 1; } +static struct mode_size { + int w; + int h; +} common_modes[] = { + { 640, 480}, + { 720, 480}, + { 800, 600}, + { 848, 480}, + {1024, 768}, + {1152, 768}, + {1280, 720}, + {1280, 800}, + {1280, 854}, + {1280, 960}, + {1280, 1024}, + {1440, 900}, + {1400, 1050}, + {1680, 1050}, + {1600, 1200}, + {1920, 1080}, + {1920, 1200} +}; + static int qxl_add_common_modes(struct drm_connector *connector, unsigned pwidth, unsigned pheight) @@ -170,29 +196,6 @@ static int qxl_add_common_modes(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct drm_display_mode *mode = NULL; int i; - struct mode_size { - int w; - int h; - } common_modes[] = { - { 640, 480}, - { 720, 480}, - { 800, 600}, - { 848, 480}, - {1024, 768}, - {1152, 768}, - {1280, 720}, - {1280, 800}, - {1280, 854}, - {1280, 960}, - {1280, 1024}, - {1440, 900}, - {1400, 1050}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200} - }; - for (i = 0; i < ARRAY_SIZE(common_modes); i++) { mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); @@ -615,7 +618,7 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, adjusted_mode->hdisplay, adjusted_mode->vdisplay); - if (qcrtc->index == 0) + if (bo->is_primary == false) recreate_primary = true; if (bo->surf.stride * bo->surf.height > qdev->vram_size) { @@ -823,11 +826,22 @@ static int qxl_conn_get_modes(struct drm_connector *connector) static int qxl_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_device *ddev = connector->dev; + struct qxl_device *qdev = ddev->dev_private; + int i; + /* TODO: is this called for user defined modes? (xrandr --add-mode) * TODO: check that the mode fits in the framebuffer */ - DRM_DEBUG("%s: %dx%d status=%d\n", mode->name, mode->hdisplay, - mode->vdisplay, mode->status); - return MODE_OK; + + if(qdev->monitors_config_width == mode->hdisplay && + qdev->monitors_config_height == mode->vdisplay) + return MODE_OK; + + for (i = 0; i < ARRAY_SIZE(common_modes); i++) { + if (common_modes[i].w == mode->hdisplay && common_modes[i].h == mode->vdisplay) + return MODE_OK; + } + return MODE_BAD; } static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector) @@ -872,13 +886,15 @@ static enum drm_connector_status qxl_conn_detect( drm_connector_to_qxl_output(connector); struct drm_device *ddev = connector->dev; struct qxl_device *qdev = ddev->dev_private; - int connected; + bool connected = false; /* The first monitor is always connected */ - connected = (output->index == 0) || - (qdev->client_monitors_config && - qdev->client_monitors_config->count > output->index && - qxl_head_enabled(&qdev->client_monitors_config->heads[output->index])); + if (!qdev->client_monitors_config) { + if (output->index == 0) + connected = true; + } else + connected = qdev->client_monitors_config->count > output->index && + qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]); DRM_DEBUG("#%d connected: %d\n", output->index, connected); if (!connected) diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 7c6cafe21f5f5..e66143cc1a7a9 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -325,6 +325,8 @@ struct qxl_device { struct work_struct fb_work; struct drm_property *hotplug_mode_update_property; + int monitors_config_width; + int monitors_config_height; }; /* forward declaration for QXL_INFO_IO */ diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index b110883f8253d..3aefaa058f0cc 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -122,8 +122,10 @@ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev, qobj = gem_to_qxl_bo(gobj); ret = qxl_release_list_add(release, qobj); - if (ret) + if (ret) { + drm_gem_object_unreference_unlocked(gobj); return NULL; + } return qobj; } @@ -166,7 +168,8 @@ static int qxl_process_single_command(struct qxl_device *qdev, cmd->command_size)) return -EFAULT; - reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL); + reloc_info = kmalloc_array(cmd->relocs_num, + sizeof(struct qxl_reloc_info), GFP_KERNEL); if (!reloc_info) return -ENOMEM; diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index b435c859dcbc3..447dbfa6c793e 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -171,8 +171,9 @@ radeon_dp_aux_transfer_atom(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return -E2BIG; tx_buf[0] = msg->address & 0xff; - tx_buf[1] = msg->address >> 8; - tx_buf[2] = msg->request << 4; + tx_buf[1] = (msg->address >> 8) & 0xff; + tx_buf[2] = (msg->request << 4) | + ((msg->address >> 16) & 0xf); tx_buf[3] = msg->size ? (msg->size - 1) : 0; switch (msg->request & ~DP_AUX_I2C_MOT) { diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index dd39f434b4a7e..bb292143997ee 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -237,6 +237,7 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, backlight_update_status(bd); DRM_INFO("radeon atom DIG backlight initialized\n"); + rdev->mode_info.bl_encoder = radeon_encoder; return; @@ -1624,8 +1625,14 @@ radeon_atom_encoder_dpms_avivo(struct drm_encoder *encoder, int mode) } else atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - args.ucAction = ATOM_LCD_BLON; - atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + if (rdev->mode_info.bl_encoder) { + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); + } else { + args.ucAction = ATOM_LCD_BLON; + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + } } break; case DRM_MODE_DPMS_STANDBY: @@ -1705,9 +1712,13 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) if (ASIC_IS_DCE4(rdev)) atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0); } - if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - atombios_dig_transmitter_setup(encoder, - ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { + if (rdev->mode_info.bl_encoder) + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); + else + atombios_dig_transmitter_setup(encoder, + ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + } if (ext_encoder) atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); break; @@ -2299,8 +2310,7 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, encoder_mode = atombios_get_encoder_mode(encoder); if (connector && (radeon_audio != 0) && ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || - (ENCODER_MODE_IS_DP(encoder_mode) && - drm_detect_monitor_audio(radeon_connector_edid(connector))))) + ENCODER_MODE_IS_DP(encoder_mode))) radeon_audio_mode_set(encoder, adjusted_mode); } diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8730562323a8b..4a09947be2445 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5818,7 +5818,7 @@ int ci_dpm_init(struct radeon_device *rdev) tmp |= DPM_ENABLED; break; default: - DRM_ERROR("Invalid PCC GPIO: %u!\n", gpio.shift); + DRM_DEBUG("Invalid PCC GPIO: %u!\n", gpio.shift); break; } WREG32_SMC(CNB_PWRMGT_CNTL, tmp); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ba50f3c1c2e03..8456653624750 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4579,6 +4579,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev, WDOORBELL32(ring->doorbell_index, ring->wptr); } +static void cik_compute_stop(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + u32 j, tmp; + + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); + /* Disable wptr polling. */ + tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); + tmp &= ~WPTR_POLL_EN; + WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); + /* Disable HQD. */ + if (RREG32(CP_HQD_ACTIVE) & 1) { + WREG32(CP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < rdev->usec_timeout; j++) { + if (!(RREG32(CP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32(CP_HQD_DEQUEUE_REQUEST, 0); + WREG32(CP_HQD_PQ_RPTR, 0); + WREG32(CP_HQD_PQ_WPTR, 0); + } + cik_srbm_select(rdev, 0, 0, 0, 0); +} + /** * cik_cp_compute_enable - enable/disable the compute CP MEs * @@ -4592,6 +4617,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) if (enable) WREG32(CP_MEC_CNTL, 0); else { + /* + * To make hibernation reliable we need to clear compute ring + * configuration before halting the compute ring. + */ + mutex_lock(&rdev->srbm_mutex); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + mutex_unlock(&rdev->srbm_mutex); + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; @@ -7905,23 +7939,27 @@ int cik_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7931,23 +7969,27 @@ int cik_irq_process(struct radeon_device *rdev) case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7957,23 +7999,27 @@ int cik_irq_process(struct radeon_device *rdev) case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7983,23 +8029,27 @@ int cik_irq_process(struct radeon_device *rdev) case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8009,23 +8059,27 @@ int cik_irq_process(struct radeon_device *rdev) case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8035,23 +8089,27 @@ int cik_irq_process(struct radeon_device *rdev) case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8071,88 +8129,112 @@ int cik_irq_process(struct radeon_device *rdev) case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index f86eb54e7763d..d16f2eebd95e6 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) } rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; + + /* FIXME use something else than big hammer but after few days can not + * seem to find good combination so reset SDMA blocks as it seems we + * do not shut them down properly. This fix hibernation and does not + * affect suspend to ram. + */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + (void)RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + (void)RREG32(SRBM_SOFT_RESET); } /** diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 68fd9fc677e35..848b1ffd5cc43 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -93,30 +93,26 @@ void dce6_afmt_select_pin(struct drm_encoder *encoder) struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->offset; - - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, - AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); + WREG32(AFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, + AFMT_AUDIO_SRC_SELECT(dig->pin->id)); } void dce6_afmt_write_latency_fields(struct drm_encoder *encoder, - struct drm_connector *connector, struct drm_display_mode *mode) + struct drm_connector *connector, + struct drm_display_mode *mode) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 tmp = 0, offset; + u32 tmp = 0; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) { if (connector->latency_present[1]) tmp = VIDEO_LIPSYNC(connector->video_latency[1]) | @@ -130,24 +126,24 @@ void dce6_afmt_write_latency_fields(struct drm_encoder *encoder, else tmp = VIDEO_LIPSYNC(0) | AUDIO_LIPSYNC(0); } - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); } void dce6_afmt_hdmi_write_speaker_allocation(struct drm_encoder *encoder, - u8 *sadb, int sad_count) + u8 *sadb, int sad_count) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset, tmp; + u32 tmp; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - /* program the speaker allocation */ - tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = RREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); /* set HDMI mode */ tmp |= HDMI_CONNECTION; @@ -155,24 +151,24 @@ void dce6_afmt_hdmi_write_speaker_allocation(struct drm_encoder *encoder, tmp |= SPEAKER_ALLOCATION(sadb[0]); else tmp |= SPEAKER_ALLOCATION(5); /* stereo */ - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); } void dce6_afmt_dp_write_speaker_allocation(struct drm_encoder *encoder, - u8 *sadb, int sad_count) + u8 *sadb, int sad_count) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset, tmp; + u32 tmp; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - /* program the speaker allocation */ - tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = RREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); tmp &= ~(HDMI_CONNECTION | SPEAKER_ALLOCATION_MASK); /* set DP mode */ tmp |= DP_CONNECTION; @@ -180,13 +176,13 @@ void dce6_afmt_dp_write_speaker_allocation(struct drm_encoder *encoder, tmp |= SPEAKER_ALLOCATION(sadb[0]); else tmp |= SPEAKER_ALLOCATION(5); /* stereo */ - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); } void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, - struct cea_sad *sads, int sad_count) + struct cea_sad *sads, int sad_count) { - u32 offset; int i; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; @@ -206,11 +202,9 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 value = 0; u8 stereo_freqs = 0; @@ -237,7 +231,7 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, value |= SUPPORTED_FREQUENCIES_STEREO(stereo_freqs); - WREG32_ENDPOINT(offset, eld_reg_to_type[i][0], value); + WREG32_ENDPOINT(dig->pin->offset, eld_reg_to_type[i][0], value); } } @@ -253,7 +247,7 @@ void dce6_audio_enable(struct radeon_device *rdev, } void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { /* Two dtos; generally use dto0 for HDMI */ u32 value = 0; @@ -272,7 +266,7 @@ void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, } void dce6_dp_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { /* Two dtos; generally use dto1 for DP */ u32 value = 0; @@ -288,6 +282,14 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ if (ASIC_IS_DCE8(rdev)) { + unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + div = radeon_audio_decode_dfs_div(div); + + if (div) + clock = clock * 100 / div; + WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); } else { diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f848acfd3fc8a..feef136cdb555 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4855,7 +4855,7 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE; rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -4873,23 +4873,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4899,23 +4903,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4925,23 +4933,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4951,23 +4963,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4977,23 +4993,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5003,23 +5023,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5039,88 +5063,100 @@ int evergreen_irq_process(struct radeon_device *rdev) case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5130,46 +5166,52 @@ int evergreen_irq_process(struct radeon_device *rdev) case 44: /* hdmi */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI2\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI3\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI4\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI5\n"); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 9953356fe2637..3cf04a2f44bbb 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev, * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ + if (ASIC_IS_DCE41(rdev)) { + unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + div = radeon_audio_decode_dfs_div(div); + + if (div) + clock = 100 * clock / div; + } + WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCCG_AUDIO_DTO1_MODULE, clock); } diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 4aa5f755572b1..13b6029d65cc5 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -511,6 +511,11 @@ #define DCCG_AUDIO_DTO1_CNTL 0x05cc # define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3) +#define DCE41_DENTIST_DISPCLK_CNTL 0x049c +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + /* DCE 4.0 AFMT */ #define HDMI_CONTROL 0x7030 # define HDMI_KEEPOUT_MODE (1 << 0) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 8f6d862a18822..21e479fefcab0 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4039,23 +4039,27 @@ int r600_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4065,23 +4069,27 @@ int r600_irq_process(struct radeon_device *rdev) case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4101,46 +4109,53 @@ int r600_irq_process(struct radeon_device *rdev) case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 4: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 5: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 10: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 12: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4150,18 +4165,22 @@ int r600_irq_process(struct radeon_device *rdev) case 21: /* hdmi */ switch (src_data) { case 4: - if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); + break; case 5: - if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 46eb0fa75a614..4bca29c5abfa1 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,6 +268,7 @@ struct radeon_clock { uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; + uint32_t vco_freq; }; /* @@ -1656,6 +1657,7 @@ struct radeon_pm { u8 fan_max_rpm; /* dpm */ bool dpm_enabled; + bool sysfs_initialized; struct radeon_dpm dpm; }; diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 8f285244c839a..de9a2ffcf5f76 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -437,7 +437,9 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } /* Fujitsu D3003-S2 board lists DVI-I as DVI-D and VGA */ - if (((dev->pdev->device == 0x9802) || (dev->pdev->device == 0x9806)) && + if (((dev->pdev->device == 0x9802) || + (dev->pdev->device == 0x9805) || + (dev->pdev->device == 0x9806)) && (dev->pdev->subsystem_vendor == 0x1734) && (dev->pdev->subsystem_device == 0x11bd)) { if (*connector_type == DRM_MODE_CONNECTOR_VGA) { @@ -448,14 +450,6 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } } - /* Fujitsu D3003-S2 board lists DVI-I as DVI-I and VGA */ - if ((dev->pdev->device == 0x9805) && - (dev->pdev->subsystem_vendor == 0x1734) && - (dev->pdev->subsystem_device == 0x11bd)) { - if (*connector_type == DRM_MODE_CONNECTOR_VGA) - return false; - } - return true; } @@ -1112,6 +1106,31 @@ union firmware_info { ATOM_FIRMWARE_INFO_V2_2 info_22; }; +union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; +}; + +static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + union igp_info *igp_info; + u8 frev, crev; + u16 data_offset; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *)(mode_info->atom_context->bios + + data_offset); + rdev->clock.vco_freq = + le32_to_cpu(igp_info->info_6.ulDentistVCOFreq); + } +} + bool radeon_atom_get_clock_info(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; @@ -1263,20 +1282,25 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) rdev->mode_info.firmware_flags = le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess); + if (ASIC_IS_DCE8(rdev)) + rdev->clock.vco_freq = + le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); + else if (ASIC_IS_DCE5(rdev)) + rdev->clock.vco_freq = rdev->clock.current_dispclk; + else if (ASIC_IS_DCE41(rdev)) + radeon_atombios_get_dentist_vco_freq(rdev); + else + rdev->clock.vco_freq = rdev->clock.current_dispclk; + + if (rdev->clock.vco_freq == 0) + rdev->clock.vco_freq = 360000; /* 3.6 GHz */ + return true; } return false; } -union igp_info { - struct _ATOM_INTEGRATED_SYSTEM_INFO info; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; -}; - bool radeon_atombios_sideport_present(struct radeon_device *rdev) { struct radeon_mode_info *mode_info = &rdev->mode_info; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 25191f126f3bb..b214663b370da 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -242,6 +242,35 @@ static struct radeon_audio_funcs dce6_dp_funcs = { .dpms = evergreen_dp_enable, }; +static void radeon_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, u8 enable_mask) +{ + struct drm_encoder *encoder; + struct radeon_encoder *radeon_encoder; + struct radeon_encoder_atom_dig *dig; + int pin_count = 0; + + if (!pin) + return; + + if (rdev->mode_info.mode_config_initialized) { + list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + if (radeon_encoder_is_digital(encoder)) { + radeon_encoder = to_radeon_encoder(encoder); + dig = radeon_encoder->enc_priv; + if (dig->pin == pin) + pin_count++; + } + } + + if ((pin_count > 1) && (enable_mask == 0)) + return; + } + + if (rdev->audio.funcs->enable) + rdev->audio.funcs->enable(rdev, pin, enable_mask); +} + static void radeon_audio_interface_init(struct radeon_device *rdev) { if (ASIC_IS_DCE6(rdev)) { @@ -307,7 +336,7 @@ int radeon_audio_init(struct radeon_device *rdev) /* disable audio. it will be set up later */ for (i = 0; i < rdev->audio.num_pins; i++) - radeon_audio_enable(rdev, &rdev->audio.pin[i], false); + radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); return 0; } @@ -329,24 +358,13 @@ void radeon_audio_endpoint_wreg(struct radeon_device *rdev, u32 offset, static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) { - struct radeon_encoder *radeon_encoder; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct cea_sad *sads; int sad_count; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } sad_count = drm_edid_to_sad(radeon_connector_edid(connector), &sads); if (sad_count <= 0) { @@ -355,8 +373,6 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) } BUG_ON(!sads); - radeon_encoder = to_radeon_encoder(encoder); - if (radeon_encoder->audio && radeon_encoder->audio->write_sad_regs) radeon_encoder->audio->write_sad_regs(encoder, sads, sad_count); @@ -365,27 +381,16 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) static void radeon_audio_write_speaker_allocation(struct drm_encoder *encoder) { + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; u8 *sadb = NULL; int sad_count; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } - sad_count = drm_edid_to_speaker_allocation( - radeon_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), + &sadb); if (sad_count < 0) { DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); @@ -399,26 +404,13 @@ static void radeon_audio_write_speaker_allocation(struct drm_encoder *encoder) } static void radeon_audio_write_latency_fields(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { - struct radeon_encoder *radeon_encoder; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = 0; - - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } - - radeon_encoder = to_radeon_encoder(encoder); if (radeon_encoder->audio && radeon_encoder->audio->write_latency_fields) radeon_encoder->audio->write_latency_fields(encoder, connector, mode); @@ -443,54 +435,47 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder) radeon_encoder->audio->select_pin(encoder); } -void radeon_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, u8 enable_mask) -{ - if (rdev->audio.funcs->enable) - rdev->audio.funcs->enable(rdev, pin, enable_mask); -} - void radeon_audio_detect(struct drm_connector *connector, + struct drm_encoder *encoder, enum drm_connector_status status) { - struct radeon_device *rdev; - struct radeon_encoder *radeon_encoder; + struct drm_device *dev = connector->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig; - if (!connector || !connector->encoder) + if (!radeon_audio_chipset_supported(rdev)) return; - rdev = connector->encoder->dev->dev_private; - - if (!radeon_audio_chipset_supported(rdev)) + if (!radeon_encoder_is_digital(encoder)) return; - radeon_encoder = to_radeon_encoder(connector->encoder); dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - struct radeon_connector *radeon_connector; - int sink_type; - - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { - radeon_encoder->audio = NULL; - return; - } - - radeon_connector = to_radeon_connector(connector); - sink_type = radeon_dp_getsinktype(radeon_connector); - - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) - radeon_encoder->audio = rdev->audio.dp_funcs; - else + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + + if (radeon_dp_getsinktype(radeon_connector) == + CONNECTOR_OBJECT_ID_DISPLAYPORT) + radeon_encoder->audio = rdev->audio.dp_funcs; + else + radeon_encoder->audio = rdev->audio.hdmi_funcs; + } else { radeon_encoder->audio = rdev->audio.hdmi_funcs; + } - dig->afmt->pin = radeon_audio_get_pin(connector->encoder); - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (!dig->pin) + dig->pin = radeon_audio_get_pin(encoder); + radeon_audio_enable(rdev, dig->pin, 0xf); + } else { + radeon_audio_enable(rdev, dig->pin, 0); + dig->pin = NULL; + } } else { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; + radeon_audio_enable(rdev, dig->pin, 0); + dig->pin = NULL; } } @@ -502,7 +487,7 @@ void radeon_audio_fini(struct radeon_device *rdev) return; for (i = 0; i < rdev->audio.num_pins; i++) - radeon_audio_enable(rdev, &rdev->audio.pin[i], false); + radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); rdev->audio.enabled = false; } @@ -518,29 +503,18 @@ static void radeon_audio_set_dto(struct drm_encoder *encoder, unsigned int clock } static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; struct hdmi_avi_infoframe frame; int err; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return -ENOENT; - } + if (!connector) + return -EINVAL; err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); if (err < 0) { @@ -548,13 +522,15 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, return err; } - if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) { - if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB) - frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; - else - frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL; - } else { - frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + if (radeon_encoder->output_csc != RADEON_OUTPUT_CSC_BYPASS) { + if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) { + if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB) + frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; + else + frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL; + } else { + frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + } } err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); @@ -563,8 +539,8 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, return err; } - if (dig && dig->afmt && - radeon_encoder->audio && radeon_encoder->audio->set_avi_packet) + if (dig && dig->afmt && radeon_encoder->audio && + radeon_encoder->audio->set_avi_packet) radeon_encoder->audio->set_avi_packet(rdev, dig->afmt->offset, buffer, sizeof(buffer)); @@ -722,59 +698,69 @@ static void radeon_audio_hdmi_mode_set(struct drm_encoder *encoder, { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); if (!dig || !dig->afmt) return; - radeon_audio_set_mute(encoder, true); + if (!connector) + return; - radeon_audio_write_speaker_allocation(encoder); - radeon_audio_write_sad_regs(encoder); - radeon_audio_write_latency_fields(encoder, mode); - radeon_audio_set_dto(encoder, mode->clock); - radeon_audio_set_vbi_packet(encoder); - radeon_hdmi_set_color_depth(encoder); - radeon_audio_update_acr(encoder, mode->clock); - radeon_audio_set_audio_packet(encoder); - radeon_audio_select_pin(encoder); + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_audio_set_mute(encoder, true); - if (radeon_audio_set_avi_packet(encoder, mode) < 0) - return; + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + radeon_audio_set_dto(encoder, mode->clock); + radeon_audio_set_vbi_packet(encoder); + radeon_hdmi_set_color_depth(encoder); + radeon_audio_update_acr(encoder, mode->clock); + radeon_audio_set_audio_packet(encoder); + radeon_audio_select_pin(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; - radeon_audio_set_mute(encoder, false); + radeon_audio_set_mute(encoder, false); + } else { + radeon_hdmi_set_color_depth(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + } } static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - struct radeon_connector *radeon_connector = to_radeon_connector(connector); - struct radeon_connector_atom_dig *dig_connector = - radeon_connector->con_priv; if (!dig || !dig->afmt) return; - radeon_audio_write_speaker_allocation(encoder); - radeon_audio_write_sad_regs(encoder); - radeon_audio_write_latency_fields(encoder, mode); - if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); - else - radeon_audio_set_dto(encoder, dig_connector->dp_clock); - radeon_audio_set_audio_packet(encoder); - radeon_audio_select_pin(encoder); - - if (radeon_audio_set_avi_packet(encoder, mode) < 0) + if (!connector) return; + + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10); + radeon_audio_set_audio_packet(encoder); + radeon_audio_select_pin(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + } } void radeon_audio_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); @@ -789,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode) if (radeon_encoder->audio && radeon_encoder->audio->dpms) radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON); } + +unsigned int radeon_audio_decode_dfs_div(unsigned int div) +{ + if (div >= 8 && div < 64) + return (div - 8) * 25 + 200; + else if (div >= 64 && div < 96) + return (div - 64) * 50 + 1600; + else if (div >= 96 && div < 128) + return (div - 96) * 100 + 3200; + else + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index c92d059ab204d..5c70cceaa4a6c 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -68,17 +68,17 @@ struct radeon_audio_funcs int radeon_audio_init(struct radeon_device *rdev); void radeon_audio_detect(struct drm_connector *connector, - enum drm_connector_status status); + struct drm_encoder *encoder, + enum drm_connector_status status); u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev, u32 offset, u32 reg); void radeon_audio_endpoint_wreg(struct radeon_device *rdev, u32 offset, u32 reg, u32 v); struct r600_audio_pin *radeon_audio_get_pin(struct drm_encoder *encoder); -void radeon_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, u8 enable_mask); void radeon_audio_fini(struct radeon_device *rdev); void radeon_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); void radeon_audio_dpms(struct drm_encoder *encoder, int mode); +unsigned int radeon_audio_decode_dfs_div(unsigned int div); #endif diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 3e5f6b71f3ada..a9b01bcf7d0a2 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -1255,10 +1255,15 @@ struct radeon_encoder_lvds *radeon_combios_get_lvds_info(struct radeon_encoder if ((RBIOS16(tmp) == lvds->native_mode.hdisplay) && (RBIOS16(tmp + 2) == lvds->native_mode.vdisplay)) { + u32 hss = (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) - 1) * 8; + + if (hss > lvds->native_mode.hdisplay) + hss = (10 - 1) * 8; + lvds->native_mode.htotal = lvds->native_mode.hdisplay + (RBIOS16(tmp + 17) - RBIOS16(tmp + 19)) * 8; lvds->native_mode.hsync_start = lvds->native_mode.hdisplay + - (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) - 1) * 8; + hss; lvds->native_mode.hsync_end = lvds->native_mode.hsync_start + (RBIOS8(tmp + 23) * 8); @@ -3382,6 +3387,14 @@ void radeon_combios_asic_init(struct drm_device *dev) rdev->pdev->subsystem_device == 0x30ae) return; + /* quirk for rs4xx HP Compaq dc5750 Small Form Factor to make it resume + * - it hangs on resume inside the dynclk 1 table. + */ + if (rdev->family == CHIP_RS480 && + rdev->pdev->subsystem_vendor == 0x103c && + rdev->pdev->subsystem_device == 0x280a) + return; + /* DYN CLK 1 */ table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); if (table) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index cebb65e07e1d1..5a2cafb4f1bc5 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector) if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } else if (radeon_dp_needs_link_train(radeon_connector)) { + /* Don't try to start link training before we + * have the dpcd */ + if (!radeon_dp_getdpcd(radeon_connector)) + return; + /* set it to OFF so that drm_helper_connector_dpms() * won't return immediately since the current state * is ON at this point. @@ -1379,8 +1384,16 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) /* updated in get modes as well since we need to know if it's analog or digital */ radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) - radeon_audio_detect(connector, ret); + if ((radeon_audio != 0) && radeon_connector->use_digital) { + const struct drm_connector_helper_funcs *connector_funcs = + connector->helper_private; + + encoder = connector_funcs->best_encoder(connector); + if (encoder && (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)) { + radeon_connector_get_edid(connector); + radeon_audio_detect(connector, encoder, ret); + } + } exit: pm_runtime_mark_last_busy(connector->dev->dev); @@ -1717,8 +1730,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) - radeon_audio_detect(connector, ret); + if ((radeon_audio != 0) && encoder) { + radeon_connector_get_edid(connector); + radeon_audio_detect(connector, encoder, ret); + } out: pm_runtime_mark_last_busy(connector->dev->dev); diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 45e54060ee97e..fa661744a1f57 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -205,8 +205,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) | (x << 16) | y)); /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + - (yorigin * 256))); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr + + yorigin * 256); } radeon_crtc->cursor_x = x; @@ -227,51 +228,32 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj) +static void radeon_set_cursor(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; - struct radeon_bo *robj = gem_to_radeon_bo(obj); - uint64_t gpu_addr; - int ret; - - ret = radeon_bo_reserve(robj, false); - if (unlikely(ret != 0)) - goto fail; - /* Only 27 bit offset for legacy cursor */ - ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, - ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &gpu_addr); - radeon_bo_unreserve(robj); - if (ret) - goto fail; if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(gpu_addr)); + upper_32_bits(radeon_crtc->cursor_addr)); WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else if (ASIC_IS_AVIVO(rdev)) { if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); } WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else { - radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); } - - return 0; - -fail: - drm_gem_object_unreference_unlocked(obj); - - return ret; } int radeon_crtc_cursor_set2(struct drm_crtc *crtc, @@ -283,7 +265,9 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_device *rdev = crtc->dev->dev_private; struct drm_gem_object *obj; + struct radeon_bo *robj; int ret; if (!handle) { @@ -305,6 +289,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return -ENOENT; } + robj = gem_to_radeon_bo(obj); + ret = radeon_bo_reserve(robj, false); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + return ret; + } + /* Only 27 bit offset for legacy cursor */ + ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, + &radeon_crtc->cursor_addr); + radeon_bo_unreserve(robj); + if (ret) { + DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); + drm_gem_object_unreference_unlocked(obj); + return ret; + } + radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; @@ -323,13 +324,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - ret = radeon_set_cursor(crtc, obj); - - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n", - ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -341,8 +337,7 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_bo_unpin(robj); radeon_bo_unreserve(robj); } - if (radeon_crtc->cursor_bo != obj) - drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); + drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); } radeon_crtc->cursor_bo = obj; @@ -360,7 +355,6 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, void radeon_cursor_reset(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - int ret; if (radeon_crtc->cursor_bo) { radeon_lock_cursor(crtc, true); @@ -368,12 +362,8 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo); - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not showing " - "cursor\n", ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a7fdfa4f0857b..ccab94ed9d94e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1572,11 +1572,21 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } - /* unpin the front buffers */ + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + if (rfb == NULL || rfb->obj == NULL) { continue; } @@ -1639,6 +1649,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + struct drm_crtc *crtc; int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) @@ -1678,6 +1689,27 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) radeon_restore_bios_scratch_regs(rdev); + /* pin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + /* Only 27 bit offset for legacy cursor */ + r = radeon_bo_pin_restricted(robj, + RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? + 0 : 1 << 27, + &radeon_crtc->cursor_addr); + if (r != 0) + DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + radeon_bo_unreserve(robj); + } + } + } + /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); @@ -1702,6 +1734,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) } drm_kms_helper_poll_enable(dev); + drm_helper_hpd_irq_event(dev); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index d2e9e9efc159c..6743174acdbcd 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1633,18 +1633,8 @@ int radeon_modeset_init(struct radeon_device *rdev) radeon_fbdev_init(rdev); drm_kms_helper_poll_init(rdev->ddev); - if (rdev->pm.dpm_enabled) { - /* do dpm late init */ - ret = radeon_pm_late_init(rdev); - if (ret) { - rdev->pm.dpm_enabled = false; - DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); - } - /* set the dpm state for PX since there won't be - * a modeset to call this. - */ - radeon_pm_compute_clocks(rdev); - } + /* do pm late init */ + ret = radeon_pm_late_init(rdev); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c index fcbd60bb03495..3b0c229d7dcd2 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c +++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c @@ -116,8 +116,8 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg AUX_SW_WR_BYTES(bytes)); /* write the data header into the registers */ - /* request, addres, msg size */ - byte = (msg->request << 4); + /* request, address, msg size */ + byte = (msg->request << 4) | ((msg->address >> 16) & 0xf); WREG32(AUX_SW_DATA + aux_offset[instance], AUX_SW_DATA_MASK(byte) | AUX_SW_AUTOINCREMENT_DISABLE); diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index 257b10be5cda9..c9ff4cf4c4e70 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -283,12 +283,13 @@ static struct drm_connector *radeon_dp_add_mst_connector(struct drm_dp_mst_topol radeon_connector->mst_encoder = radeon_dp_create_fake_mst_encoder(master); drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); + drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); drm_mode_connector_set_path_property(connector, pathprop); drm_reinit_primary_mode_group(dev); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); radeon_fb_add_connector(rdev, connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_connector_register(connector); return connector; @@ -303,12 +304,12 @@ static void radeon_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_connector_unregister(connector); /* need to nuke the connector */ - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); /* dpms off */ radeon_fb_remove_connector(rdev, connector); drm_connector_cleanup(connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_reinit_primary_mode_group(dev); diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index ef99917f000d9..c6ee80216cf4a 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -194,7 +194,6 @@ static void radeon_encoder_add_backlight(struct radeon_encoder *radeon_encoder, radeon_atom_backlight_init(radeon_encoder, connector); else radeon_legacy_backlight_init(radeon_encoder, connector); - rdev->mode_info.bl_encoder = radeon_encoder; } } diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index aeb676708e60c..634793ea84188 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 5450fa95a47ef..c4777c8d0312a 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -260,8 +260,10 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, } } } - mb(); - radeon_gart_tlb_flush(rdev); + if (rdev->gart.ptr) { + mb(); + radeon_gart_tlb_flush(rdev); + } } /** @@ -306,8 +308,10 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, page_base += RADEON_GPU_PAGE_SIZE; } } - mb(); - radeon_gart_tlb_flush(rdev); + if (rdev->gart.ptr) { + mb(); + radeon_gart_tlb_flush(rdev); + } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c1310b9531..186d0b792a02c 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -36,6 +36,7 @@ void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { if (robj->gem_base.import_attach) drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); + radeon_mn_unregister(robj); radeon_bo_unref(&robj); } } @@ -471,6 +472,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 7162c935371c6..f682e5351252e 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -79,10 +79,12 @@ static void radeon_hotplug_work_func(struct work_struct *work) struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + mutex_lock(&mode_config->mutex); if (mode_config->num_connector) { list_for_each_entry(connector, &mode_config->connector_list, head) radeon_connector_hotplug(connector); } + mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); } diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index 45715307db717..30de43366eae8 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -441,6 +441,7 @@ void radeon_legacy_backlight_init(struct radeon_encoder *radeon_encoder, backlight_update_status(bd); DRM_INFO("radeon legacy LVDS backlight initialized\n"); + rdev->mode_info.bl_encoder = radeon_encoder; return; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index fa91a17b81b69..9af2d8398e90f 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -237,7 +237,6 @@ struct radeon_afmt { int offset; bool last_buffer_filled_status; int id; - struct r600_audio_pin *pin; }; struct radeon_mode_info { @@ -343,7 +342,6 @@ struct radeon_crtc { int max_cursor_width; int max_cursor_height; uint32_t legacy_display_base_addr; - uint32_t legacy_cursor_offset; enum radeon_rmx_type rmx_type; u8 h_border; u8 v_border; @@ -440,6 +438,7 @@ struct radeon_encoder_atom_dig { uint8_t backlight_level; int panel_mode; struct radeon_afmt *afmt; + struct r600_audio_pin *pin; int active_mst_links; }; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 318165d4855c4..741065bd14b34 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "radeon.h" #include "radeon_trace.h" @@ -75,7 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); - radeon_mn_unregister(bo); mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); @@ -226,7 +226,7 @@ int radeon_bo_create(struct radeon_device *rdev, /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 */ - bo->flags &= ~RADEON_GEM_GTT_WC; + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) /* Don't try to enable write-combining when it can't work, or things * may be slow @@ -238,7 +238,13 @@ int radeon_bo_create(struct radeon_device *rdev, DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); - bo->flags &= ~RADEON_GEM_GTT_WC; + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + bo->flags &= ~RADEON_GEM_GTT_WC; #endif radeon_ttm_placement_from_domain(bo, domain); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index c1ba83a8dd8c9..a56eab7f0ab1d 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -720,10 +720,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct radeon_device *rdev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* Skip limit attributes if DPM is not enabled */ + /* Skip attributes if DPM is not enabled */ if (rdev->pm.pm_method != PM_METHOD_DPM && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || - attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) + attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; /* Skip fan attributes if fan is not present */ @@ -1075,12 +1079,6 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* update display watermarks based on new power state */ radeon_bandwidth_update(rdev); - /* update displays */ - radeon_dpm_display_configuration_changed(rdev); - - rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; - rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; - rdev->pm.dpm.single_display = single_display; /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { @@ -1097,6 +1095,13 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) radeon_dpm_post_set_power_state(rdev); + /* update displays */ + radeon_dpm_display_configuration_changed(rdev); + + rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; + rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; + rdev->pm.dpm.single_display = single_display; + if (rdev->asic->dpm.force_performance_level) { if (rdev->pm.dpm.thermal_active) { enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; @@ -1331,14 +1336,6 @@ static int radeon_pm_init_old(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->pm.dynpm_idle_work, radeon_dynpm_idle_work_handler); if (rdev->pm.num_power_states > 1) { - /* where's the best place to put these? */ - ret = device_create_file(rdev->dev, &dev_attr_power_profile); - if (ret) - DRM_ERROR("failed to create device file for power profile\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_method); - if (ret) - DRM_ERROR("failed to create device file for power method\n"); - if (radeon_debugfs_pm_init(rdev)) { DRM_ERROR("Failed to register debugfs file for PM!\n"); } @@ -1396,20 +1393,6 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) goto dpm_failed; rdev->pm.dpm_enabled = true; - ret = device_create_file(rdev->dev, &dev_attr_power_dpm_state); - if (ret) - DRM_ERROR("failed to create device file for dpm state\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_dpm_force_performance_level); - if (ret) - DRM_ERROR("failed to create device file for dpm state\n"); - /* XXX: these are noops for dpm but are here for backwards compat */ - ret = device_create_file(rdev->dev, &dev_attr_power_profile); - if (ret) - DRM_ERROR("failed to create device file for power profile\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_method); - if (ret) - DRM_ERROR("failed to create device file for power method\n"); - if (radeon_debugfs_pm_init(rdev)) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); } @@ -1550,9 +1533,51 @@ int radeon_pm_late_init(struct radeon_device *rdev) int ret = 0; if (rdev->pm.pm_method == PM_METHOD_DPM) { - mutex_lock(&rdev->pm.mutex); - ret = radeon_dpm_late_enable(rdev); - mutex_unlock(&rdev->pm.mutex); + if (rdev->pm.dpm_enabled) { + if (!rdev->pm.sysfs_initialized) { + ret = device_create_file(rdev->dev, &dev_attr_power_dpm_state); + if (ret) + DRM_ERROR("failed to create device file for dpm state\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_dpm_force_performance_level); + if (ret) + DRM_ERROR("failed to create device file for dpm state\n"); + /* XXX: these are noops for dpm but are here for backwards compat */ + ret = device_create_file(rdev->dev, &dev_attr_power_profile); + if (ret) + DRM_ERROR("failed to create device file for power profile\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_method); + if (ret) + DRM_ERROR("failed to create device file for power method\n"); + if (!ret) + rdev->pm.sysfs_initialized = true; + } + + mutex_lock(&rdev->pm.mutex); + ret = radeon_dpm_late_enable(rdev); + mutex_unlock(&rdev->pm.mutex); + if (ret) { + rdev->pm.dpm_enabled = false; + DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); + } else { + /* set the dpm state for PX since there won't be + * a modeset to call this. + */ + radeon_pm_compute_clocks(rdev); + } + } + } else { + if ((rdev->pm.num_power_states > 1) && + (!rdev->pm.sysfs_initialized)) { + /* where's the best place to put these? */ + ret = device_create_file(rdev->dev, &dev_attr_power_profile); + if (ret) + DRM_ERROR("failed to create device file for power profile\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_method); + if (ret) + DRM_ERROR("failed to create device file for power method\n"); + if (!ret) + rdev->pm.sysfs_initialized = true; + } } return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c507896aca45a..197b157b73d09 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,8 +349,13 @@ int radeon_sa_bo_new(struct radeon_device *rdev, /* see if we can skip over some allocations */ } while (radeon_sa_bo_next_hole(sa_manager, fences, tries)); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_ref(fences[i]); + spin_unlock(&sa_manager->wq.lock); r = radeon_fence_wait_any(rdev, fences, false); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_unref(&fences[i]); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ if (r == -ENOENT) { diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index edafd3c2b1702..f5c0590bbf736 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -758,7 +758,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) { - while (--i) { + while (i--) { pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i], PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); gtt->ttm.dma_address[i] = 0; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 9c3377ca17b75..8ec4e4591756e 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -456,15 +456,15 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (soffset) { /* make sure object fit at this offset */ - eoffset = soffset + size; + eoffset = soffset + size - 1; if (soffset >= eoffset) { r = -EINVAL; goto error_unreserve; } last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + if (last_pfn >= rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n", last_pfn, rdev->vm_manager.max_pfn); r = -EINVAL; goto error_unreserve; @@ -479,7 +479,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, eoffset /= RADEON_GPU_PAGE_SIZE; if (soffset || eoffset) { struct interval_tree_node *it; - it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); + it = interval_tree_iter_first(&vm->va, soffset, eoffset); if (it && it != &bo_va->it) { struct radeon_bo_va *tmp; tmp = container_of(it, struct radeon_bo_va, it); @@ -522,7 +522,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (soffset || eoffset) { bo_va->it.start = soffset; - bo_va->it.last = eoffset - 1; + bo_va->it.last = eoffset; interval_tree_insert(&bo_va->it, &vm->va); } @@ -891,7 +891,7 @@ static void radeon_vm_fence_pts(struct radeon_vm *vm, unsigned i; start >>= radeon_vm_block_size; - end >>= radeon_vm_block_size; + end = (end - 1) >> radeon_vm_block_size; for (i = start; i <= end; ++i) radeon_bo_fence(vm->page_tables[i].bo, fence, true); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4c679b802bc85..e15185b165049 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6466,23 +6466,27 @@ int si_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6492,23 +6496,27 @@ int si_irq_process(struct radeon_device *rdev) case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6518,23 +6526,27 @@ int si_irq_process(struct radeon_device *rdev) case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6544,23 +6556,27 @@ int si_irq_process(struct radeon_device *rdev) case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6570,23 +6586,27 @@ int si_irq_process(struct radeon_device *rdev) case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6596,23 +6616,27 @@ int si_irq_process(struct radeon_device *rdev) case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6632,88 +6656,112 @@ int si_irq_process(struct radeon_device *rdev) case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index ff8b83f5e929a..9dfcedec05a69 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2925,6 +2925,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 3afac30139838..c126f6bfbed15 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -915,6 +915,11 @@ #define DCCG_AUDIO_DTO1_PHASE 0x05c0 #define DCCG_AUDIO_DTO1_MODULE 0x05c4 +#define DENTIST_DISPCLK_CNTL 0x0490 +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + #define AFMT_AUDIO_SRC_CONTROL 0x713c #define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) /* AFMT_AUDIO_SRC_SELECT diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index eb2282cc4a565..eba5f8a52fbd9 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -54,55 +54,56 @@ static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj) &rk_obj->dma_attrs); } -int rockchip_gem_mmap_buf(struct drm_gem_object *obj, - struct vm_area_struct *vma) +static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) + { + int ret; struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); struct drm_device *drm = obj->dev; - unsigned long vm_size; - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - vm_size = vma->vm_end - vma->vm_start; - - if (vm_size > obj->size) - return -EINVAL; + /* + * dma_alloc_attrs() allocated a struct page table for rk_obj, so clear + * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap(). + */ + vma->vm_flags &= ~VM_PFNMAP; - return dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, + ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, obj->size, &rk_obj->dma_attrs); + if (ret) + drm_gem_vm_close(vma); + + return ret; } -/* drm driver mmap file operations */ -int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) +int rockchip_gem_mmap_buf(struct drm_gem_object *obj, + struct vm_area_struct *vma) { - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_gem_object *obj; - struct drm_vma_offset_node *node; + struct drm_device *drm = obj->dev; int ret; - if (drm_device_is_unplugged(dev)) - return -ENODEV; + mutex_lock(&drm->struct_mutex); + ret = drm_gem_mmap_obj(obj, obj->size, vma); + mutex_unlock(&drm->struct_mutex); + if (ret) + return ret; - mutex_lock(&dev->struct_mutex); + return rockchip_drm_gem_object_mmap(obj, vma); +} - node = drm_vma_offset_exact_lookup(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); - if (!node) { - mutex_unlock(&dev->struct_mutex); - DRM_ERROR("failed to find vma node.\n"); - return -EINVAL; - } else if (!drm_vma_node_is_allowed(node, filp)) { - mutex_unlock(&dev->struct_mutex); - return -EACCES; - } +/* drm driver mmap file operations */ +int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_gem_object *obj; + int ret; - obj = container_of(node, struct drm_gem_object, vma_node); - ret = rockchip_gem_mmap_buf(obj, vma); + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; - mutex_unlock(&dev->struct_mutex); + obj = vma->vm_private_data; - return ret; + return rockchip_drm_gem_object_mmap(obj, vma); } struct rockchip_gem_object * diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index d6b55e3e3716c..a43a836e6f882 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -72,34 +72,32 @@ static inline void tegra_dpaux_writel(struct tegra_dpaux *dpaux, static void tegra_dpaux_write_fifo(struct tegra_dpaux *dpaux, const u8 *buffer, size_t size) { - unsigned long offset = DPAUX_DP_AUXDATA_WRITE(0); size_t i, j; - for (i = 0; i < size; i += 4) { - size_t num = min_t(size_t, size - i, 4); + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); unsigned long value = 0; for (j = 0; j < num; j++) - value |= buffer[i + j] << (j * 8); + value |= buffer[i * 4 + j] << (j * 8); - tegra_dpaux_writel(dpaux, value, offset++); + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXDATA_WRITE(i)); } } static void tegra_dpaux_read_fifo(struct tegra_dpaux *dpaux, u8 *buffer, size_t size) { - unsigned long offset = DPAUX_DP_AUXDATA_READ(0); size_t i, j; - for (i = 0; i < size; i += 4) { - size_t num = min_t(size_t, size - i, 4); + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); unsigned long value; - value = tegra_dpaux_readl(dpaux, offset++); + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXDATA_READ(i)); for (j = 0; j < num; j++) - buffer[i + j] = value >> (j * 8); + buffer[i * 4 + j] = value >> (j * 8); } } diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 7a207ca547be2..6394547cf67a0 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -328,6 +328,8 @@ static int __init vgem_init(void) goto out; } + drm_dev_set_unique(vgem_device, "vgem"); + ret = drm_dev_register(vgem_device, 0); if (ret) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 620bb5cf617c9..2aa0e927d4907 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,6 +25,7 @@ * **************************************************************************/ #include +#include #include #include "vmwgfx_drv.h" @@ -1447,6 +1448,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static int __init vmwgfx_init(void) { int ret; + +#ifdef CONFIG_VGA_CONSOLE + if (vgacon_text_force()) + return -EINVAL; +#endif + ret = drm_pci_init(&driver, &vmw_pci_driver); if (ret) DRM_ERROR("Failed initializing DRM.\n"); @@ -1458,6 +1465,9 @@ static void __exit vmwgfx_exit(void) drm_pci_exit(&driver, &vmw_pci_driver); } +MODULE_INFO(vmw_patch, "ed7d78b2"); +MODULE_INFO(vmw_patch, "54c12bc3"); + module_init(vmwgfx_init); module_exit(vmwgfx_exit); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d26a6daa9719a..d8896ed41b9eb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -636,7 +636,8 @@ extern int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, uint32_t size, bool shareable, uint32_t *handle, - struct vmw_dma_buffer **p_dma_buf); + struct vmw_dma_buffer **p_dma_buf, + struct ttm_base_object **p_base); extern int vmw_user_dmabuf_reference(struct ttm_object_file *tfile, struct vmw_dma_buffer *dma_buf, uint32_t *handle); @@ -650,7 +651,8 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo, uint32_t cur_validate_node); extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo); extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, - uint32_t id, struct vmw_dma_buffer **out); + uint32_t id, struct vmw_dma_buffer **out, + struct ttm_base_object **base); extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 654c8daeb5ab3..aee1c6ccc52d8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -887,7 +887,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo, + NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use MOB buffer.\n"); ret = -EINVAL; @@ -949,7 +950,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo, + NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); ret = -EINVAL; @@ -2492,7 +2494,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true, NULL); if (unlikely(ret != 0)) - goto out_err; + goto out_err_nores; ret = vmw_validate_buffers(dev_priv, sw_context); if (unlikely(ret != 0)) @@ -2536,6 +2538,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, vmw_resource_relocations_free(&sw_context->res_relocations); vmw_fifo_commit(dev_priv, command_size); + mutex_unlock(&dev_priv->binding_mutex); vmw_query_bo_switch_commit(dev_priv, sw_context); ret = vmw_execbuf_fence_commands(file_priv, dev_priv, @@ -2551,7 +2554,6 @@ int vmw_execbuf_process(struct drm_file *file_priv, DRM_ERROR("Fence submission error. Syncing.\n"); vmw_resource_list_unreserve(&sw_context->resource_list, false); - mutex_unlock(&dev_priv->binding_mutex); ttm_eu_fence_buffer_objects(&ticket, &sw_context->validate_nodes, (void *) fence); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index 87e39f68e9d07..e1898982b44af 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -484,7 +484,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, goto out_unlock; } - ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf); + ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf, NULL); if (ret) goto out_unlock; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 210ef15b1d091..c5b4c47e86d63 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -356,7 +356,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv, } *out_surf = NULL; - ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf); + ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf, NULL); return ret; } @@ -483,7 +483,8 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, uint32_t size, bool shareable, uint32_t *handle, - struct vmw_dma_buffer **p_dma_buf) + struct vmw_dma_buffer **p_dma_buf, + struct ttm_base_object **p_base) { struct vmw_user_dma_buffer *user_bo; struct ttm_buffer_object *tmp; @@ -517,6 +518,10 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, } *p_dma_buf = &user_bo->dma; + if (p_base) { + *p_base = &user_bo->prime.base; + kref_get(&(*p_base)->refcount); + } *handle = user_bo->prime.base.hash.key; out_no_base_object: @@ -633,6 +638,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, struct vmw_dma_buffer *dma_buf; struct vmw_user_dma_buffer *user_bo; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct ttm_base_object *buffer_base; int ret; if ((arg->flags & (drm_vmw_synccpu_read | drm_vmw_synccpu_write)) == 0 @@ -645,7 +651,8 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, switch (arg->op) { case drm_vmw_synccpu_grab: - ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf); + ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf, + &buffer_base); if (unlikely(ret != 0)) return ret; @@ -653,6 +660,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, dma); ret = vmw_user_dmabuf_synccpu_grab(user_bo, tfile, arg->flags); vmw_dmabuf_unreference(&dma_buf); + ttm_base_object_unref(&buffer_base); if (unlikely(ret != 0 && ret != -ERESTARTSYS && ret != -EBUSY)) { DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n", @@ -694,7 +702,8 @@ int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data, return ret; ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, - req->size, false, &handle, &dma_buf); + req->size, false, &handle, &dma_buf, + NULL); if (unlikely(ret != 0)) goto out_no_dmabuf; @@ -723,7 +732,8 @@ int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data, } int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, - uint32_t handle, struct vmw_dma_buffer **out) + uint32_t handle, struct vmw_dma_buffer **out, + struct ttm_base_object **p_base) { struct vmw_user_dma_buffer *vmw_user_bo; struct ttm_base_object *base; @@ -745,7 +755,10 @@ int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, prime.base); (void)ttm_bo_reference(&vmw_user_bo->dma.base); - ttm_base_object_unref(&base); + if (p_base) + *p_base = base; + else + ttm_base_object_unref(&base); *out = &vmw_user_bo->dma; return 0; @@ -1006,7 +1019,7 @@ int vmw_dumb_create(struct drm_file *file_priv, ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, args->size, false, &args->handle, - &dma_buf); + &dma_buf, NULL); if (unlikely(ret != 0)) goto out_no_dmabuf; @@ -1034,7 +1047,7 @@ int vmw_dumb_map_offset(struct drm_file *file_priv, struct vmw_dma_buffer *out_buf; int ret; - ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf); + ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf, NULL); if (ret != 0) return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 6a4584a43aa6c..d2751ada19b1e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -470,7 +470,7 @@ int vmw_shader_define_ioctl(struct drm_device *dev, void *data, if (arg->buffer_handle != SVGA3D_INVALID_ID) { ret = vmw_user_dmabuf_lookup(tfile, arg->buffer_handle, - &buffer); + &buffer, NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find buffer for shader " "creation.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 4ecdbf3e59da2..17a4107639b2a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -43,6 +43,7 @@ struct vmw_user_surface { struct vmw_surface srf; uint32_t size; struct drm_master *master; + struct ttm_base_object *backup_base; }; /** @@ -652,6 +653,8 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + if (user_srf->backup_base) + ttm_base_object_unref(&user_srf->backup_base); vmw_resource_unreference(&res); } @@ -846,7 +849,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, res->backup_size, true, &backup_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); goto out_unlock; @@ -1309,7 +1313,8 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, if (req->buffer_handle != SVGA3D_INVALID_ID) { ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); } else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer) ret = vmw_user_dmabuf_alloc(dev_priv, tfile, @@ -1317,7 +1322,8 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, req->drm_surface_flags & drm_vmw_surface_flag_shareable, &backup_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 722a925795a28..9ce9dfeb12583 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1589,7 +1589,7 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask) "Multi-Axis Controller" }; const char *type, *bus; - char buf[64]; + char buf[64] = ""; unsigned int i; int len; int ret; diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 3318de690e006..39bf74793b8b0 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -356,6 +356,8 @@ static int cp2112_read(struct cp2112_device *dev, u8 *data, size_t size) struct cp2112_force_read_report report; int ret; + if (size > sizeof(dev->read_data)) + size = sizeof(dev->read_data); report.report = CP2112_DATA_READ_FORCE_SEND; report.length = cpu_to_be16(size); @@ -535,7 +537,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, struct cp2112_device *dev = (struct cp2112_device *)adap->algo_data; struct hid_device *hdev = dev->hdev; u8 buf[64]; - __be16 word; + __le16 word; ssize_t count; size_t read_length = 0; unsigned int retries; @@ -552,7 +554,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, if (I2C_SMBUS_READ == read_write) count = cp2112_read_req(buf, addr, read_length); else - count = cp2112_write_req(buf, addr, data->byte, NULL, + count = cp2112_write_req(buf, addr, command, NULL, 0); break; case I2C_SMBUS_BYTE_DATA: @@ -567,7 +569,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, break; case I2C_SMBUS_WORD_DATA: read_length = 2; - word = cpu_to_be16(data->word); + word = cpu_to_le16(data->word); if (I2C_SMBUS_READ == read_write) count = cp2112_write_read_req(buf, addr, read_length, @@ -580,7 +582,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, size = I2C_SMBUS_WORD_DATA; read_write = I2C_SMBUS_READ; read_length = 2; - word = cpu_to_be16(data->word); + word = cpu_to_le16(data->word); count = cp2112_write_read_req(buf, addr, read_length, command, (u8 *)&word, 2); @@ -673,7 +675,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, data->byte = buf[0]; break; case I2C_SMBUS_WORD_DATA: - data->word = be16_to_cpup((__be16 *)buf); + data->word = le16_to_cpup((__le16 *)buf); break; case I2C_SMBUS_BLOCK_DATA: if (read_length > I2C_SMBUS_BLOCK_MAX) { diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 008e89bf6f3c3..32d52d29cc68d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -462,12 +462,15 @@ static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type, static void hidinput_cleanup_battery(struct hid_device *dev) { + const struct power_supply_desc *psy_desc; + if (!dev->battery) return; + psy_desc = dev->battery->desc; power_supply_unregister(dev->battery); - kfree(dev->battery->desc->name); - kfree(dev->battery->desc); + kfree(psy_desc->name); + kfree(psy_desc); dev->battery = NULL; } #else /* !CONFIG_HID_BATTERY_STRENGTH */ diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c index 94167310e15a4..b905d501e752d 100644 --- a/drivers/hid/hid-uclogic.c +++ b/drivers/hid/hid-uclogic.c @@ -858,7 +858,7 @@ static int uclogic_tablet_enable(struct hid_device *hdev) for (p = drvdata->rdesc; p <= drvdata->rdesc + drvdata->rsize - 4;) { if (p[0] == 0xFE && p[1] == 0xED && p[2] == 0x1D && - p[3] < sizeof(params)) { + p[3] < ARRAY_SIZE(params)) { v = params[p[3]]; put_unaligned(cpu_to_le32(v), (s32 *)p); p += 4; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index bfbe1bedda7f3..eab5bd6a24426 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -164,7 +164,7 @@ static void hid_io_error(struct hid_device *hid) if (time_after(jiffies, usbhid->stop_retry)) { /* Retries failed, so do a port reset unless we lack bandwidth*/ - if (test_bit(HID_NO_BANDWIDTH, &usbhid->iofl) + if (!test_bit(HID_NO_BANDWIDTH, &usbhid->iofl) && !test_and_set_bit(HID_RESET_PENDING, &usbhid->iofl)) { schedule_work(&usbhid->reset_work); diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index f155b83804819..2b3105c8aed39 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -126,7 +126,7 @@ static int ads1015_reg_to_mv(struct i2c_client *client, unsigned int channel, struct ads1015_data *data = i2c_get_clientdata(client); unsigned int pga = data->channel_data[channel].pga; int fullscale = fullscale_table[pga]; - const unsigned mask = data->id == ads1115 ? 0x7fff : 0x7ff0; + const int mask = data->id == ads1115 ? 0x7fff : 0x7ff0; return DIV_ROUND_CLOSEST(reg * fullscale, mask); } diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index a3dae6d0082a0..83ea8c8039faf 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -406,16 +406,11 @@ static int gpio_fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { struct gpio_fan_data *fan_data = cdev->devdata; - int r; if (!fan_data) return -EINVAL; - r = get_fan_speed_index(fan_data); - if (r < 0) - return r; - - *state = r; + *state = fan_data->speed_index; return 0; } diff --git a/drivers/hwmon/mcp3021.c b/drivers/hwmon/mcp3021.c index d219c06a857bb..972444a14cca5 100644 --- a/drivers/hwmon/mcp3021.c +++ b/drivers/hwmon/mcp3021.c @@ -31,14 +31,11 @@ /* output format */ #define MCP3021_SAR_SHIFT 2 #define MCP3021_SAR_MASK 0x3ff - #define MCP3021_OUTPUT_RES 10 /* 10-bit resolution */ -#define MCP3021_OUTPUT_SCALE 4 #define MCP3221_SAR_SHIFT 0 #define MCP3221_SAR_MASK 0xfff #define MCP3221_OUTPUT_RES 12 /* 12-bit resolution */ -#define MCP3221_OUTPUT_SCALE 1 enum chips { mcp3021, @@ -54,7 +51,6 @@ struct mcp3021_data { u16 sar_shift; u16 sar_mask; u8 output_res; - u8 output_scale; }; static int mcp3021_read16(struct i2c_client *client) @@ -84,13 +80,7 @@ static int mcp3021_read16(struct i2c_client *client) static inline u16 volts_from_reg(struct mcp3021_data *data, u16 val) { - if (val == 0) - return 0; - - val = val * data->output_scale - data->output_scale / 2; - - return val * DIV_ROUND_CLOSEST(data->vdd, - (1 << data->output_res) * data->output_scale); + return DIV_ROUND_CLOSEST(data->vdd * val, 1 << data->output_res); } static ssize_t show_in_input(struct device *dev, struct device_attribute *attr, @@ -132,14 +122,12 @@ static int mcp3021_probe(struct i2c_client *client, data->sar_shift = MCP3021_SAR_SHIFT; data->sar_mask = MCP3021_SAR_MASK; data->output_res = MCP3021_OUTPUT_RES; - data->output_scale = MCP3021_OUTPUT_SCALE; break; case mcp3221: data->sar_shift = MCP3221_SAR_SHIFT; data->sar_mask = MCP3221_SAR_MASK; data->output_res = MCP3221_OUTPUT_RES; - data->output_scale = MCP3221_OUTPUT_SCALE; break; } diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index bd1c99deac71b..2aaedbe0b0235 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -354,6 +354,10 @@ static const u16 NCT6775_REG_TEMP_CRIT[ARRAY_SIZE(nct6775_temp_label) - 1] /* NCT6776 specific data */ +/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ +#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME +#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME + static const s8 NCT6776_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ @@ -3528,8 +3532,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3600,8 +3604,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3677,8 +3681,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 55765790907b3..fbfc02bb2cfa1 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -195,7 +195,7 @@ static int nct7802_read_voltage(struct nct7802_data *data, int nr, int index) } static int nct7802_write_voltage(struct nct7802_data *data, int nr, int index, - unsigned int voltage) + unsigned long voltage) { int shift = 8 - REG_VOLTAGE_LIMIT_MSB_SHIFT[index - 1][nr]; int err; @@ -547,7 +547,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj, if (index >= 9 && index < 18 && (reg & 0x0c) != 0x04 && (reg & 0x0c) != 0x08) /* RD2 */ return 0; - if (index >= 18 && index < 27 && (reg & 0x30) != 0x10) /* RD3 */ + if (index >= 18 && index < 27 && (reg & 0x30) != 0x20) /* RD3 */ return 0; if (index >= 27 && index < 35) /* local */ return attr->mode; diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index b77b82f244800..08ff89d222e5f 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -412,8 +412,9 @@ static ssize_t show_pwm(struct device *dev, return sprintf(buf, "%d\n", val); } -static ssize_t store_mode(struct device *dev, struct device_attribute *devattr, - const char *buf, size_t count) +static ssize_t store_enable(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) { int index = to_sensor_dev_attr(devattr)->index; struct nct7904_data *data = dev_get_drvdata(dev); @@ -422,18 +423,18 @@ static ssize_t store_mode(struct device *dev, struct device_attribute *devattr, if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; - if (val > 1 || (val && !data->fan_mode[index])) + if (val < 1 || val > 2 || (val == 2 && !data->fan_mode[index])) return -EINVAL; ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + index, - val ? data->fan_mode[index] : 0); + val == 2 ? data->fan_mode[index] : 0); return ret ? ret : count; } -/* Return 0 for manual mode or 1 for SmartFan mode */ -static ssize_t show_mode(struct device *dev, - struct device_attribute *devattr, char *buf) +/* Return 1 for manual mode or 2 for SmartFan mode */ +static ssize_t show_enable(struct device *dev, + struct device_attribute *devattr, char *buf) { int index = to_sensor_dev_attr(devattr)->index; struct nct7904_data *data = dev_get_drvdata(dev); @@ -443,36 +444,36 @@ static ssize_t show_mode(struct device *dev, if (val < 0) return val; - return sprintf(buf, "%d\n", val ? 1 : 0); + return sprintf(buf, "%d\n", val ? 2 : 1); } /* 2 attributes per channel: pwm and mode */ -static SENSOR_DEVICE_ATTR(fan1_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 0); -static SENSOR_DEVICE_ATTR(fan1_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 0); -static SENSOR_DEVICE_ATTR(fan2_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 0); +static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 1); -static SENSOR_DEVICE_ATTR(fan2_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 1); -static SENSOR_DEVICE_ATTR(fan3_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 1); +static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 2); -static SENSOR_DEVICE_ATTR(fan3_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 2); -static SENSOR_DEVICE_ATTR(fan4_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 2); +static SENSOR_DEVICE_ATTR(pwm4, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 3); -static SENSOR_DEVICE_ATTR(fan4_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 3); +static SENSOR_DEVICE_ATTR(pwm4_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 3); static struct attribute *nct7904_fanctl_attrs[] = { - &sensor_dev_attr_fan1_pwm.dev_attr.attr, - &sensor_dev_attr_fan1_mode.dev_attr.attr, - &sensor_dev_attr_fan2_pwm.dev_attr.attr, - &sensor_dev_attr_fan2_mode.dev_attr.attr, - &sensor_dev_attr_fan3_pwm.dev_attr.attr, - &sensor_dev_attr_fan3_mode.dev_attr.attr, - &sensor_dev_attr_fan4_pwm.dev_attr.attr, - &sensor_dev_attr_fan4_mode.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_pwm1_enable.dev_attr.attr, + &sensor_dev_attr_pwm2.dev_attr.attr, + &sensor_dev_attr_pwm2_enable.dev_attr.attr, + &sensor_dev_attr_pwm3.dev_attr.attr, + &sensor_dev_attr_pwm3_enable.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm4_enable.dev_attr.attr, NULL }; @@ -574,6 +575,7 @@ static const struct i2c_device_id nct7904_id[] = { {"nct7904", 0}, {} }; +MODULE_DEVICE_TABLE(i2c, nct7904_id); static struct i2c_driver nct7904_driver = { .class = I2C_CLASS_HWMON, diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 894531d315b83..046144fc5aff1 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -543,7 +543,7 @@ static int coresight_name_match(struct device *dev, void *data) to_match = data; i_csdev = to_coresight_device(dev); - if (!strcmp(to_match, dev_name(&i_csdev->dev))) + if (to_match && !strcmp(to_match, dev_name(&i_csdev->dev))) return 1; return 0; diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index ff23d1bdd2307..9bd10a9b4b50b 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -65,6 +65,9 @@ #define AT91_TWI_UNRE 0x0080 /* Underrun Error */ #define AT91_TWI_NACK 0x0100 /* Not Acknowledged */ +#define AT91_TWI_INT_MASK \ + (AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY | AT91_TWI_NACK) + #define AT91_TWI_IER 0x0024 /* Interrupt Enable Register */ #define AT91_TWI_IDR 0x0028 /* Interrupt Disable Register */ #define AT91_TWI_IMR 0x002c /* Interrupt Mask Register */ @@ -119,13 +122,12 @@ static void at91_twi_write(struct at91_twi_dev *dev, unsigned reg, unsigned val) static void at91_disable_twi_interrupts(struct at91_twi_dev *dev) { - at91_twi_write(dev, AT91_TWI_IDR, - AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY); + at91_twi_write(dev, AT91_TWI_IDR, AT91_TWI_INT_MASK); } static void at91_twi_irq_save(struct at91_twi_dev *dev) { - dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & 0x7; + dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & AT91_TWI_INT_MASK; at91_disable_twi_interrupts(dev); } @@ -215,6 +217,14 @@ static void at91_twi_write_data_dma_callback(void *data) dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg), dev->buf_len, DMA_TO_DEVICE); + /* + * When this callback is called, THR/TX FIFO is likely not to be empty + * yet. So we have to wait for TXCOMP or NACK bits to be set into the + * Status Register to be sure that the STOP bit has been sent and the + * transfer is completed. The NACK interrupt has already been enabled, + * we just have to enable TXCOMP one. + */ + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); } @@ -309,7 +319,7 @@ static void at91_twi_read_data_dma_callback(void *data) /* The last two bytes have to be read without using dma */ dev->buf += dev->buf_len - 2; dev->buf_len = 2; - at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_RXRDY); + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_RXRDY | AT91_TWI_TXCOMP); } static void at91_twi_read_data_dma(struct at91_twi_dev *dev) @@ -370,7 +380,7 @@ static irqreturn_t atmel_twi_interrupt(int irq, void *dev_id) /* catch error flags */ dev->transfer_status |= status; - if (irqstatus & AT91_TWI_TXCOMP) { + if (irqstatus & (AT91_TWI_TXCOMP | AT91_TWI_NACK)) { at91_disable_twi_interrupts(dev); complete(&dev->cmd_complete); } @@ -384,6 +394,34 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) unsigned long time_left; bool has_unre_flag = dev->pdata->has_unre_flag; + /* + * WARNING: the TXCOMP bit in the Status Register is NOT a clear on + * read flag but shows the state of the transmission at the time the + * Status Register is read. According to the programmer datasheet, + * TXCOMP is set when both holding register and internal shifter are + * empty and STOP condition has been sent. + * Consequently, we should enable NACK interrupt rather than TXCOMP to + * detect transmission failure. + * + * Besides, the TXCOMP bit is already set before the i2c transaction + * has been started. For read transactions, this bit is cleared when + * writing the START bit into the Control Register. So the + * corresponding interrupt can safely be enabled just after. + * However for write transactions managed by the CPU, we first write + * into THR, so TXCOMP is cleared. Then we can safely enable TXCOMP + * interrupt. If TXCOMP interrupt were enabled before writing into THR, + * the interrupt handler would be called immediately and the i2c command + * would be reported as completed. + * Also when a write transaction is managed by the DMA controller, + * enabling the TXCOMP interrupt in this function may lead to a race + * condition since we don't know whether the TXCOMP interrupt is enabled + * before or after the DMA has started to write into THR. So the TXCOMP + * interrupt is enabled later by at91_twi_write_data_dma_callback(). + * Immediately after in that DMA callback, we still need to send the + * STOP condition manually writing the corresponding bit into the + * Control Register. + */ + dev_dbg(dev->dev, "transfer: %s %d bytes.\n", (dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len); @@ -414,26 +452,24 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) * seems to be the best solution. */ if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) { + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_NACK); at91_twi_read_data_dma(dev); - /* - * It is important to enable TXCOMP irq here because - * doing it only when transferring the last two bytes - * will mask NACK errors since TXCOMP is set when a - * NACK occurs. - */ - at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP); - } else + } else { at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP | AT91_TWI_RXRDY); + AT91_TWI_TXCOMP | + AT91_TWI_NACK | + AT91_TWI_RXRDY); + } } else { if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) { + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_NACK); at91_twi_write_data_dma(dev); - at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); } else { at91_twi_write_next_byte(dev); at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP | AT91_TWI_TXRDY); + AT91_TWI_TXCOMP | + AT91_TWI_NACK | + AT91_TWI_TXRDY); } } diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 0a80e4aabaed9..3f7d4876937e7 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,22 @@ static u32 i2c_dw_get_clk_rate_khz(struct dw_i2c_dev *dev) } #ifdef CONFIG_ACPI +/* + * The HCNT/LCNT information coming from ACPI should be the most accurate + * for given platform. However, some systems get it wrong. On such systems + * we get better results by calculating those based on the input clock. + */ +static const struct dmi_system_id dw_i2c_no_acpi_params[] = { + { + .ident = "Dell Inspiron 7348", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7348"), + }, + }, + { } +}; + static void dw_i2c_acpi_params(struct platform_device *pdev, char method[], u16 *hcnt, u16 *lcnt, u32 *sda_hold) { @@ -58,6 +75,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[], acpi_handle handle = ACPI_HANDLE(&pdev->dev); union acpi_object *obj; + if (dmi_check_system(dw_i2c_no_acpi_params)) + return; + if (ACPI_FAILURE(acpi_evaluate_object(handle, method, NULL, &buf))) return; @@ -253,12 +273,6 @@ static int dw_i2c_probe(struct platform_device *pdev) adap->dev.parent = &pdev->dev; adap->dev.of_node = pdev->dev.of_node; - r = i2c_add_numbered_adapter(adap); - if (r) { - dev_err(&pdev->dev, "failure adding adapter\n"); - return r; - } - if (dev->pm_runtime_disabled) { pm_runtime_forbid(&pdev->dev); } else { @@ -268,6 +282,13 @@ static int dw_i2c_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); } + r = i2c_add_numbered_adapter(adap); + if (r) { + dev_err(&pdev->dev, "failure adding adapter\n"); + pm_runtime_disable(&pdev->dev); + return r; + } + return 0; } diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 30059c1df2a3b..5801227b97ab0 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -669,8 +669,6 @@ mv64xxx_i2c_can_offload(struct mv64xxx_i2c_data *drv_data) struct i2c_msg *msgs = drv_data->msgs; int num = drv_data->num_msgs; - return false; - if (!drv_data->offload_enabled) return false; diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 5a84bea5b8451..d9d022cdfff00 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -688,15 +688,16 @@ static int rcar_i2c_probe(struct platform_device *pdev) return ret; } + pm_runtime_enable(dev); + platform_set_drvdata(pdev, priv); + ret = i2c_add_numbered_adapter(adap); if (ret < 0) { dev_err(dev, "reg adap failed: %d\n", ret); + pm_runtime_disable(dev); return ret; } - pm_runtime_enable(dev); - platform_set_drvdata(pdev, priv); - dev_info(dev, "probed\n"); return 0; diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 297e9c9ac9432..424794271703a 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -1243,17 +1243,19 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) i2c->adap.nr = i2c->pdata->bus_num; i2c->adap.dev.of_node = pdev->dev.of_node; + platform_set_drvdata(pdev, i2c); + + pm_runtime_enable(&pdev->dev); + ret = i2c_add_numbered_adapter(&i2c->adap); if (ret < 0) { dev_err(&pdev->dev, "failed to add bus to i2c core\n"); + pm_runtime_disable(&pdev->dev); s3c24xx_i2c_deregister_cpufreq(i2c); clk_unprepare(i2c->clk); return ret; } - platform_set_drvdata(pdev, i2c); - - pm_runtime_enable(&pdev->dev); pm_runtime_enable(&i2c->adap.dev); dev_info(&pdev->dev, "%s: S3C I2C adapter\n", dev_name(&i2c->adap.dev)); diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 06cc1ff088f12..2ba7c0fbc6150 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -51,7 +51,7 @@ static int i2c_mux_master_xfer(struct i2c_adapter *adap, ret = priv->select(parent, priv->mux_priv, priv->chan_id); if (ret >= 0) - ret = parent->algo->master_xfer(parent, msgs, num); + ret = __i2c_transfer(parent, msgs, num); if (priv->deselect) priv->deselect(parent, priv->mux_priv, priv->chan_id); @@ -144,6 +144,7 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, priv->adap.dev.parent = &parent->dev; priv->adap.retries = parent->retries; priv->adap.timeout = parent->timeout; + priv->adap.quirks = parent->quirks; /* Sanity check on class */ if (i2c_mux_parent_classes(parent) & class) diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c index cb772775da431..0c8d4d2cbdaf4 100644 --- a/drivers/i2c/muxes/i2c-mux-pca9541.c +++ b/drivers/i2c/muxes/i2c-mux-pca9541.c @@ -104,7 +104,7 @@ static int pca9541_reg_write(struct i2c_client *client, u8 command, u8 val) buf[0] = command; buf[1] = val; msg.buf = buf; - ret = adap->algo->master_xfer(adap, &msg, 1); + ret = __i2c_transfer(adap, &msg, 1); } else { union i2c_smbus_data data; @@ -144,7 +144,7 @@ static int pca9541_reg_read(struct i2c_client *client, u8 command) .buf = &val } }; - ret = adap->algo->master_xfer(adap, msg, 2); + ret = __i2c_transfer(adap, msg, 2); if (ret == 2) ret = val; else if (ret >= 0) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index bea0d2de29938..ea4aa9dfcea96 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -134,7 +134,7 @@ static int pca954x_reg_write(struct i2c_adapter *adap, msg.len = 1; buf[0] = val; msg.buf = buf; - ret = adap->algo->master_xfer(adap, &msg, 1); + ret = __i2c_transfer(adap, &msg, 1); } else { union i2c_smbus_data data; ret = adap->algo->smbus_xfer(adap, client->addr, diff --git a/drivers/iio/accel/bmc150-accel.c b/drivers/iio/accel/bmc150-accel.c index 73e87739d2191..bf827d012a714 100644 --- a/drivers/iio/accel/bmc150-accel.c +++ b/drivers/iio/accel/bmc150-accel.c @@ -1465,7 +1465,7 @@ static void bmc150_accel_unregister_triggers(struct bmc150_accel_data *data, { int i; - for (i = from; i >= 0; i++) { + for (i = from; i >= 0; i--) { if (data->triggers[i].indio_trig) { iio_trigger_unregister(data->triggers[i].indio_trig); data->triggers[i].indio_trig = NULL; diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 51da3692d5613..5b7a860df524b 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1418,6 +1418,7 @@ static const struct dev_pm_ops kxcjk1013_pm_ops = { static const struct acpi_device_id kx_acpi_match[] = { {"KXCJ1013", KXCJK1013}, {"KXCJ1008", KXCJ91008}, + {"KXCJ9000", KXCJ91008}, {"KXTJ1009", KXTJ21009}, {"SMO8500", KXCJ91008}, { }, diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 211b13271c615..2ae7150442fc8 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -149,8 +149,6 @@ #define ST_ACCEL_4_BDU_MASK 0x40 #define ST_ACCEL_4_DRDY_IRQ_ADDR 0x21 #define ST_ACCEL_4_DRDY_IRQ_INT1_MASK 0x04 -#define ST_ACCEL_4_IG1_EN_ADDR 0x21 -#define ST_ACCEL_4_IG1_EN_MASK 0x08 #define ST_ACCEL_4_MULTIREAD_BIT true static const struct iio_chan_spec st_accel_12bit_channels[] = { @@ -446,10 +444,6 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .drdy_irq = { .addr = ST_ACCEL_4_DRDY_IRQ_ADDR, .mask_int1 = ST_ACCEL_4_DRDY_IRQ_INT1_MASK, - .ig1 = { - .en_addr = ST_ACCEL_4_IG1_EN_ADDR, - .en_mask = ST_ACCEL_4_IG1_EN_MASK, - }, }, .multi_read_bit = ST_ACCEL_4_MULTIREAD_BIT, .bootime = 2, /* guess */ diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index e36a73e7c3a85..1bcb65b8d4a1a 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -146,8 +146,7 @@ config DA9150_GPADC config CC10001_ADC tristate "Cosmic Circuits 10001 ADC driver" - depends on HAVE_CLK || REGULATOR - depends on HAS_IOMEM + depends on HAS_IOMEM && HAVE_CLK && REGULATOR select IIO_BUFFER select IIO_TRIGGERED_BUFFER help diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 8a0eb4a04fb55..7b40925dd4ff2 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -182,7 +182,7 @@ struct at91_adc_caps { u8 ts_pen_detect_sensitivity; /* startup time calculate function */ - u32 (*calc_startup_ticks)(u8 startup_time, u32 adc_clk_khz); + u32 (*calc_startup_ticks)(u32 startup_time, u32 adc_clk_khz); u8 num_channels; struct at91_adc_reg_desc registers; @@ -201,7 +201,7 @@ struct at91_adc_state { u8 num_channels; void __iomem *reg_base; struct at91_adc_reg_desc *registers; - u8 startup_time; + u32 startup_time; u8 sample_hold_time; bool sleep_mode; struct iio_trigger **trig; @@ -779,7 +779,7 @@ static int at91_adc_of_get_resolution(struct at91_adc_state *st, return ret; } -static u32 calc_startup_ticks_9260(u8 startup_time, u32 adc_clk_khz) +static u32 calc_startup_ticks_9260(u32 startup_time, u32 adc_clk_khz) { /* * Number of ticks needed to cover the startup time of the ADC @@ -790,7 +790,7 @@ static u32 calc_startup_ticks_9260(u8 startup_time, u32 adc_clk_khz) return round_up((startup_time * adc_clk_khz / 1000) - 1, 8) / 8; } -static u32 calc_startup_ticks_9x5(u8 startup_time, u32 adc_clk_khz) +static u32 calc_startup_ticks_9x5(u32 startup_time, u32 adc_clk_khz) { /* * For sama5d3x and at91sam9x5, the formula changes to: diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 8d4e019ea4ca5..9c311c1e1ac7f 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -349,3 +349,7 @@ static struct platform_driver rockchip_saradc_driver = { }; module_platform_driver(rockchip_saradc_driver); + +MODULE_AUTHOR("Heiko Stuebner "); +MODULE_DESCRIPTION("Rockchip SARADC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c index 94c5f05b4bc1b..4caecbea4c97d 100644 --- a/drivers/iio/adc/twl4030-madc.c +++ b/drivers/iio/adc/twl4030-madc.c @@ -835,7 +835,8 @@ static int twl4030_madc_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, twl4030_madc_threaded_irq_handler, - IRQF_TRIGGER_RISING, "twl4030_madc", madc); + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + "twl4030_madc", madc); if (ret) { dev_err(&pdev->dev, "could not request irq\n"); goto err_i2c; diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 610fc98f88efa..595511022795f 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -36,6 +36,8 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) s32 poll_value = 0; if (state) { + if (!atomic_read(&st->user_requested_state)) + return 0; if (sensor_hub_device_open(st->hsdev)) return -EIO; @@ -52,8 +54,12 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) poll_value = hid_sensor_read_poll_value(st); } else { - if (!atomic_dec_and_test(&st->data_ready)) + int val; + + val = atomic_dec_if_positive(&st->data_ready); + if (val < 0) return 0; + sensor_hub_device_close(st->hsdev); state_val = hid_sensor_get_usage_index(st->hsdev, st->power_state.report_id, @@ -92,9 +98,11 @@ EXPORT_SYMBOL(hid_sensor_power_state); int hid_sensor_power_state(struct hid_sensor_common *st, bool state) { + #ifdef CONFIG_PM int ret; + atomic_set(&st->user_requested_state, state); if (state) ret = pm_runtime_get_sync(&st->pdev->dev); else { @@ -109,6 +117,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) return 0; #else + atomic_set(&st->user_requested_state, state); return _hid_sensor_power_state(st, state); #endif } diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c index 61bb9d4239eaf..e98428df0d447 100644 --- a/drivers/iio/dac/ad5624r_spi.c +++ b/drivers/iio/dac/ad5624r_spi.c @@ -22,7 +22,7 @@ #include "ad5624r.h" static int ad5624r_spi_write(struct spi_device *spi, - u8 cmd, u8 addr, u16 val, u8 len) + u8 cmd, u8 addr, u16 val, u8 shift) { u32 data; u8 msg[3]; @@ -35,7 +35,7 @@ static int ad5624r_spi_write(struct spi_device *spi, * 14-, 12-bit input code followed by 0, 2, or 4 don't care bits, * for the AD5664R, AD5644R, and AD5624R, respectively. */ - data = (0 << 22) | (cmd << 19) | (addr << 16) | (val << (16 - len)); + data = (0 << 22) | (cmd << 19) | (addr << 16) | (val << shift); msg[0] = data >> 16; msg[1] = data >> 8; msg[2] = data; diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index b3d0e94f72eb7..8d24393456739 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -53,7 +53,8 @@ config ADXRS450 config BMG160 tristate "BOSCH BMG160 Gyro Sensor" depends on I2C - select IIO_TRIGGERED_BUFFER if IIO_BUFFER + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for Bosch BMG160 Tri-axis Gyro Sensor driver. This driver also supports BMI055 gyroscope. diff --git a/drivers/iio/imu/adis16400_core.c b/drivers/iio/imu/adis16400_core.c index 2fd68f2219a7d..d42e4fe2c7ed3 100644 --- a/drivers/iio/imu/adis16400_core.c +++ b/drivers/iio/imu/adis16400_core.c @@ -780,7 +780,7 @@ static struct adis16400_chip_info adis16400_chips[] = { .flags = ADIS16400_HAS_PROD_ID | ADIS16400_HAS_SERIAL_NUMBER | ADIS16400_BURST_DIAG_STAT, - .gyro_scale_micro = IIO_DEGREE_TO_RAD(10000), /* 0.01 deg/s */ + .gyro_scale_micro = IIO_DEGREE_TO_RAD(40000), /* 0.04 deg/s */ .accel_scale_micro = IIO_G_TO_M_S_2(833), /* 1/1200 g */ .temp_scale_nano = 73860000, /* 0.07386 C */ .temp_offset = 31000000 / 73860, /* 31 C = 0x00 */ diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 989605dd6f781..b94bfd3f595bf 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -110,6 +110,10 @@ struct adis16480_chip_info { unsigned int num_channels; const struct iio_chan_spec *channels; + unsigned int gyro_max_val; + unsigned int gyro_max_scale; + unsigned int accel_max_val; + unsigned int accel_max_scale; }; struct adis16480 { @@ -497,19 +501,21 @@ static int adis16480_set_filter_freq(struct iio_dev *indio_dev, static int adis16480_read_raw(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val, int *val2, long info) { + struct adis16480 *st = iio_priv(indio_dev); + switch (info) { case IIO_CHAN_INFO_RAW: return adis_single_conversion(indio_dev, chan, 0, val); case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_ANGL_VEL: - *val = 0; - *val2 = IIO_DEGREE_TO_RAD(20000); /* 0.02 degree/sec */ - return IIO_VAL_INT_PLUS_MICRO; + *val = st->chip_info->gyro_max_scale; + *val2 = st->chip_info->gyro_max_val; + return IIO_VAL_FRACTIONAL; case IIO_ACCEL: - *val = 0; - *val2 = IIO_G_TO_M_S_2(800); /* 0.8 mg */ - return IIO_VAL_INT_PLUS_MICRO; + *val = st->chip_info->accel_max_scale; + *val2 = st->chip_info->accel_max_val; + return IIO_VAL_FRACTIONAL; case IIO_MAGN: *val = 0; *val2 = 100; /* 0.0001 gauss */ @@ -674,18 +680,39 @@ static const struct adis16480_chip_info adis16480_chip_info[] = { [ADIS16375] = { .channels = adis16485_channels, .num_channels = ARRAY_SIZE(adis16485_channels), + /* + * storing the value in rad/degree and the scale in degree + * gives us the result in rad and better precession than + * storing the scale directly in rad. + */ + .gyro_max_val = IIO_RAD_TO_DEGREE(22887), + .gyro_max_scale = 300, + .accel_max_val = IIO_M_S_2_TO_G(21973), + .accel_max_scale = 18, }, [ADIS16480] = { .channels = adis16480_channels, .num_channels = ARRAY_SIZE(adis16480_channels), + .gyro_max_val = IIO_RAD_TO_DEGREE(22500), + .gyro_max_scale = 450, + .accel_max_val = IIO_M_S_2_TO_G(12500), + .accel_max_scale = 5, }, [ADIS16485] = { .channels = adis16485_channels, .num_channels = ARRAY_SIZE(adis16485_channels), + .gyro_max_val = IIO_RAD_TO_DEGREE(22500), + .gyro_max_scale = 450, + .accel_max_val = IIO_M_S_2_TO_G(20000), + .accel_max_scale = 5, }, [ADIS16488] = { .channels = adis16480_channels, .num_channels = ARRAY_SIZE(adis16480_channels), + .gyro_max_val = IIO_RAD_TO_DEGREE(22500), + .gyro_max_scale = 450, + .accel_max_val = IIO_M_S_2_TO_G(22500), + .accel_max_scale = 18, }, }; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index 17d4bb15be4d2..65ce868371771 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -431,6 +431,23 @@ static int inv_mpu6050_write_gyro_scale(struct inv_mpu6050_state *st, int val) return -EINVAL; } +static int inv_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_ANGL_VEL: + return IIO_VAL_INT_PLUS_NANO; + default: + return IIO_VAL_INT_PLUS_MICRO; + } + default: + return IIO_VAL_INT_PLUS_MICRO; + } + + return -EINVAL; +} static int inv_mpu6050_write_accel_scale(struct inv_mpu6050_state *st, int val) { int result, i; @@ -696,6 +713,7 @@ static const struct iio_info mpu_info = { .driver_module = THIS_MODULE, .read_raw = &inv_mpu6050_read_raw, .write_raw = &inv_mpu6050_write_raw, + .write_raw_get_fmt = &inv_write_raw_get_fmt, .attrs = &inv_attribute_group, .validate_trigger = inv_mpu6050_validate_trigger, }; diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index df919f44d513f..7fa280b28ecb7 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -151,7 +151,7 @@ unsigned int iio_buffer_poll(struct file *filp, struct iio_buffer *rb = indio_dev->buffer; if (!indio_dev->info) - return -ENODEV; + return 0; poll_wait(filp, &rb->pollq, wait); if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0)) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index a99692ba91bc7..69b8c338fa894 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -84,7 +84,7 @@ static unsigned int iio_event_poll(struct file *filep, unsigned int events = 0; if (!indio_dev->info) - return -ENODEV; + return events; poll_wait(filep, &ev_int->wait, wait); diff --git a/drivers/iio/light/cm3323.c b/drivers/iio/light/cm3323.c index 869033e48a1fa..a1d4905cc9d2d 100644 --- a/drivers/iio/light/cm3323.c +++ b/drivers/iio/light/cm3323.c @@ -123,7 +123,7 @@ static int cm3323_set_it_bits(struct cm3323_data *data, int val, int val2) for (i = 0; i < ARRAY_SIZE(cm3323_int_time); i++) { if (val == cm3323_int_time[i].val && val2 == cm3323_int_time[i].val2) { - reg_conf = data->reg_conf; + reg_conf = data->reg_conf & ~CM3323_CONF_IT_MASK; reg_conf |= i << CM3323_CONF_IT_SHIFT; ret = i2c_smbus_write_word_data(data->client, diff --git a/drivers/iio/light/tcs3414.c b/drivers/iio/light/tcs3414.c index 71c2bde275aac..f8b1df018abee 100644 --- a/drivers/iio/light/tcs3414.c +++ b/drivers/iio/light/tcs3414.c @@ -185,7 +185,7 @@ static int tcs3414_write_raw(struct iio_dev *indio_dev, if (val != 0) return -EINVAL; for (i = 0; i < ARRAY_SIZE(tcs3414_times); i++) { - if (val == tcs3414_times[i] * 1000) { + if (val2 == tcs3414_times[i] * 1000) { data->timing &= ~TCS3414_INTEG_MASK; data->timing |= i; return i2c_smbus_write_byte_data( diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c index fa40f6d0ca394..bd26a484abcc3 100644 --- a/drivers/iio/proximity/sx9500.c +++ b/drivers/iio/proximity/sx9500.c @@ -206,7 +206,7 @@ static int sx9500_read_proximity(struct sx9500_data *data, if (ret < 0) return ret; - *val = 32767 - (s16)be16_to_cpu(regval); + *val = be16_to_cpu(regval); return IIO_VAL_INT; } diff --git a/drivers/iio/temperature/tmp006.c b/drivers/iio/temperature/tmp006.c index 84a0789c3d968..7a8050996b4ec 100644 --- a/drivers/iio/temperature/tmp006.c +++ b/drivers/iio/temperature/tmp006.c @@ -132,6 +132,9 @@ static int tmp006_write_raw(struct iio_dev *indio_dev, struct tmp006_data *data = iio_priv(indio_dev); int i; + if (mask != IIO_CHAN_INFO_SAMP_FREQ) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(tmp006_freqs); i++) if ((val == tmp006_freqs[i][0]) && (val2 == tmp006_freqs[i][1])) { diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0271608a51c40..0962b6821ce1f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -859,6 +859,11 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) case IB_CM_SIDR_REQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); + spin_lock_irq(&cm.lock); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) + rb_erase(&cm_id_priv->sidr_id_node, + &cm.remote_sidr_table); + spin_unlock_irq(&cm.lock); break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: @@ -3098,7 +3103,10 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm.lock, flags); - rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); + } spin_unlock_irqrestore(&cm.lock, flags); return 0; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b716b08156446..bebf11a6622a7 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -85,7 +85,7 @@ */ struct ib_uverbs_device { - struct kref ref; + atomic_t refcount; int num_comp_vectors; struct completion comp; struct device *dev; @@ -94,6 +94,7 @@ struct ib_uverbs_device { struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; + struct kobject kobj; }; struct ib_uverbs_event_file { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a9f048990dfcd..ccc2494b4ea73 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2244,6 +2244,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->send_flags = user_wr->send_flags; if (is_ud) { + if (next->opcode != IB_WR_SEND && + next->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); if (!next->wr.ud.ah) { @@ -2283,9 +2289,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, user_wr->wr.atomic.compare_add; next->wr.atomic.swap = user_wr->wr.atomic.swap; next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + case IB_WR_SEND: break; default: - break; + ret = -EINVAL; + goto out_put; } } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 88cce9bb72fea..09686d49d4c14 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -129,14 +129,18 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static void ib_uverbs_release_dev(struct kref *ref) +static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = - container_of(ref, struct ib_uverbs_device, ref); + container_of(kobj, struct ib_uverbs_device, kobj); - complete(&dev->comp); + kfree(dev); } +static struct kobj_type ib_uverbs_dev_ktype = { + .release = ib_uverbs_release_dev, +}; + static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = @@ -302,13 +306,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, return context->device->dealloc_ucontext(context); } +static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) +{ + complete(&dev->comp); +} + static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); - kref_put(&file->device->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&file->device->refcount)) + ib_uverbs_comp_dev(file->device); kfree(file); } @@ -742,9 +752,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) int ret; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); - if (dev) - kref_get(&dev->ref); - else + if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -765,6 +773,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->mutex); filp->private_data = file; + kobject_get(&dev->kobj); return nonseekable_open(inode, filp); @@ -772,13 +781,16 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) module_put(dev->ib_dev->owner); err: - kref_put(&dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&dev->refcount)) + ib_uverbs_comp_dev(dev); + return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_device *dev = file->device; ib_uverbs_cleanup_ucontext(file, file->ucontext); @@ -786,6 +798,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); + kobject_put(&dev->kobj); return 0; } @@ -881,10 +894,11 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!uverbs_dev) return; - kref_init(&uverbs_dev->ref); + atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); + kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -911,6 +925,7 @@ static void ib_uverbs_add_one(struct ib_device *device) cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; @@ -941,9 +956,10 @@ static void ib_uverbs_add_one(struct ib_device *device) clear_bit(devnum, overflow_map); err: - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); return; } @@ -963,9 +979,10 @@ static void ib_uverbs_remove_one(struct ib_device *device) else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); } static char *uverbs_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index f50a546224adf..33fdd50123f73 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -148,9 +148,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + if (ll == IB_LINK_LAYER_ETHERNET) + ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29; + else + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; if (ah->av.ib.stat_rate) ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 0176caa5792c4..2857ed89725e6 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -629,7 +629,7 @@ static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries, * simulated FLUSH_ERR completions */ list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) { - mlx4_ib_qp_sw_comp(qp, num_entries, wc, npolled, 1); + mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 1); if (*npolled >= num_entries) goto out; } diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index ed327e6c8fdca..a0559a8af4f4d 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -206,15 +206,16 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) { struct mlx4_ib_dev *dev = ctx->dev; struct ib_ah_attr ah_attr; + unsigned long flags; - spin_lock(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, flags); if (!dev->sm_ah[ctx->port - 1]) { /* port is not yet Active, sm_ah not ready */ - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); return -EAGAIN; } mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, NULL, mad); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 6797108ce8735..69fb5ba94d0f2 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -640,6 +640,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) struct mlx4_port *p; int i; int ret; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) == + IB_LINK_LAYER_ETHERNET; p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) @@ -657,7 +659,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) p->pkey_group.name = "pkey_idx"; p->pkey_group.attrs = - alloc_group_attrs(show_port_pkey, store_port_pkey, + alloc_group_attrs(show_port_pkey, + is_eth ? NULL : store_port_pkey, dev->dev->caps.pkey_table_len[port_num]); if (!p->pkey_group.attrs) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 71c5935838649..0c52f078759c0 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1119,19 +1119,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; error: - /* - * Destroy the umem *before* destroying the MR, to ensure we - * will not have any in-flight notifiers when destroying the - * MR. - * - * As the MR is completely invalid to begin with, and this - * error path is only taken if we can't push the mr entry into - * the pagefault tree, this is safe. - */ - ib_umem_release(umem); - /* Kill the MR, and return an error code. */ - clean_mr(mr); return ERR_PTR(err); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 9dcb66077d6cb..219f2122f9b96 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -679,7 +679,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, ocrdma_release_ucontext_pd(uctx); } else { status = _ocrdma_dealloc_pd(dev, pd); - kfree(pd); } exit: return ERR_PTR(status); diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index ad843c786e721..5afaa218508d2 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -86,6 +86,10 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) * unrestricted LKEY. */ rkt->gen++; + /* + * bits are capped in qib_verbs.c to insure enough bits + * for generation number + */ mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) << 8); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 4fa88ba2963e6..131994382b220 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -100,9 +100,10 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct qpn_map *map; @@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, size_t sz; size_t sg_list_sz; struct ib_qp *ret; + gfp_t gfp; + if (init_attr->cap.max_send_sge > ib_qib_max_sges || init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || - init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + return ERR_PTR(-EINVAL); + + /* GFP_NOIO is applicable in RC QPs only */ + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { @@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, sz = sizeof(struct qib_sge) * init_attr->cap.max_send_sge + sizeof(struct qib_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; @@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp); if (!qp->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct qib_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + - qp->r_rq.size * sz); + if (gfp != GFP_NOIO) + qp->r_rq.wq = vmalloc_user( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + else + qp->r_rq.wq = __vmalloc( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4a3599890ea5f..9dd5d9a0556b3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "qib.h" #include "qib_common.h" @@ -2089,10 +2090,16 @@ int qib_register_ib_device(struct qib_devdata *dd) * the LKEY). The remaining bits act as a generation number or tag. */ spin_lock_init(&dev->lk_table.lock); + /* insure generation is at least 4 bits see keys.c */ + if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) { + qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", + ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS); + ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS; + } dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); dev->lk_table.table = (struct qib_mregion __rcu **) - __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); + vmalloc(lk_tab_size); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; @@ -2265,7 +2272,7 @@ int qib_register_ib_device(struct qib_devdata *dd) sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size)); + vfree(dev->lk_table.table); err_lk: kfree(dev->qp_table); err_qpt: @@ -2319,8 +2326,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - free_pages((unsigned long) dev->lk_table.table, - get_order(lk_tab_size)); + vfree(dev->lk_table.table); kfree(dev->qp_table); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index bfc8948fdd359..44ca28c83fe60 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -647,6 +647,8 @@ struct qib_qpn_table { struct qpn_map map[QPNMAP_ENTRIES]; }; +#define MAX_LKEY_TABLE_BITS 23 + struct qib_lkey_table { spinlock_t lock; /* protect changes in this struct */ u32 next; /* next unused index (speeds search) */ diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index f8ea069a3eafc..b2fb5286dbd98 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; - struct qib_mcast_qp *p, *tmp; + struct qib_mcast_qp *p, *tmp, *delp = NULL; struct rb_node *n; int last = 0; int ret; - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; spin_lock_irq(&ibp->lock); @@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) while (1) { if (n == NULL) { spin_unlock_irq(&ibp->lock); - ret = -EINVAL; - goto bail; + return -EINVAL; } mcast = rb_entry(n, struct qib_mcast, rb_node); @@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) */ list_del_rcu(&p->list); mcast->n_attached--; + delp = p; /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { @@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(delp); - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - qib_mcast_qp_free(p); - } if (last) { atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); @@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) dev->n_mcast_grps_allocated--; spin_unlock_irq(&dev->n_mcast_grps_lock); } - - ret = 0; - -bail: - return ret; + return 0; } int qib_mcast_tree_empty(struct qib_ibport *ibp) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index e5cc43074196d..2d13fd08ceb74 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -176,7 +176,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - goto out_free_wq; + if (ret != -ENOSYS) + goto out_free_wq; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); if (IS_ERR(priv->recv_cq)) { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 6a594aac22900..c933d882c35c4 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -201,6 +201,7 @@ iser_initialize_task_headers(struct iscsi_task *task, goto out; } + tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; @@ -360,16 +361,19 @@ iscsi_iser_task_xmit(struct iscsi_task *task) static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_tx_desc *tx_desc = &iser_task->desc; - struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_tx_desc *tx_desc = &iser_task->desc; + struct iser_conn *iser_conn = task->conn->dd_data; struct iser_device *device = iser_conn->ib_conn.device; /* DEVICE_REMOVAL event might have already released the device */ if (!device) return; - ib_dma_unmap_single(device->ib_device, - tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); + if (likely(tx_desc->mapped)) { + ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, + ISER_HEADERS_LEN, DMA_TO_DEVICE); + tx_desc->mapped = false; + } /* mgmt tasks do not need special cleanup */ if (!task->sc) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 262ba1f8ee507..d2b6caf7694d7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -270,6 +270,7 @@ enum iser_desc_type { * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task + * @mapped: Is the task header mapped */ struct iser_tx_desc { struct iser_hdr iser_header; @@ -278,6 +279,7 @@ struct iser_tx_desc { u64 dma_addr; struct ib_sge tx_sg[2]; int num_sge; + bool mapped; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 3e2118e8ed879..0a47f42fec24e 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -454,7 +454,7 @@ int iser_send_data_out(struct iscsi_conn *conn, unsigned long buf_offset; unsigned long data_seg_len; uint32_t itt; - int err = 0; + int err; struct ib_sge *tx_dsg; itt = (__force uint32_t)hdr->itt; @@ -475,7 +475,9 @@ int iser_send_data_out(struct iscsi_conn *conn, memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); /* build the tx desc */ - iser_initialize_task_headers(task, tx_desc); + err = iser_initialize_task_headers(task, tx_desc); + if (err) + goto send_data_out_error; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT]; tx_dsg = &tx_desc->tx_sg[1]; @@ -502,7 +504,7 @@ int iser_send_data_out(struct iscsi_conn *conn, send_data_out_error: kmem_cache_free(ig.desc_cache, tx_desc); - iser_err("conn %p failed err %d\n",conn, err); + iser_err("conn %p failed err %d\n", conn, err); return err; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 575a072d765f6..353e2ab090eef 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1349,7 +1349,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); else if (dump_payload && imm_data) - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1774,7 +1774,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { struct se_cmd *se_cmd = &cmd->se_cmd; - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } } @@ -1947,7 +1947,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, spin_unlock_bh(&cmd->istate_lock); if (ret) { - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); } else { @@ -2996,9 +2996,16 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) static int isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) { - int ret; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + int ret = 0; switch (state) { + case ISTATE_REMOVE: + spin_lock_bh(&conn->cmd_lock); + list_del_init(&cmd->i_conn_node); + spin_unlock_bh(&conn->cmd_lock); + isert_put_cmd(isert_cmd, true); + break; case ISTATE_SEND_NOPIN_WANT_RESPONSE: ret = isert_put_nopin(cmd, conn, false); break; @@ -3363,6 +3370,41 @@ isert_wait4flush(struct isert_conn *isert_conn) wait_for_completion(&isert_conn->wait_comp_err); } +/** + * isert_put_unsol_pending_cmds() - Drop commands waiting for + * unsolicitate dataout + * @conn: iscsi connection + * + * We might still have commands that are waiting for unsolicited + * dataouts messages. We must put the extra reference on those + * before blocking on the target_wait_for_session_cmds + */ +static void +isert_put_unsol_pending_cmds(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd, *tmp; + static LIST_HEAD(drop_cmd_list); + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, tmp, &conn->conn_cmd_list, i_conn_node) { + if ((cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA) && + (cmd->write_data_done < conn->sess->sess_ops->FirstBurstLength) && + (cmd->write_data_done < cmd->se_cmd.data_length)) + list_move_tail(&cmd->i_conn_node, &drop_cmd_list); + } + spin_unlock_bh(&conn->cmd_lock); + + list_for_each_entry_safe(cmd, tmp, &drop_cmd_list, i_conn_node) { + list_del_init(&cmd->i_conn_node); + if (cmd->i_state != ISTATE_REMOVE) { + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + + isert_info("conn %p dropping cmd %p\n", conn, cmd); + isert_put_cmd(isert_cmd, true); + } + } +} + static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -3381,8 +3423,9 @@ static void isert_wait_conn(struct iscsi_conn *conn) isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->mutex); - isert_wait4cmds(conn); isert_wait4flush(isert_conn); + isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 918814cd0f806..025f931054447 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -465,14 +465,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - struct srp_target_port *target = ch->target; static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; struct ib_recv_wr *bad_wr; int ret; /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(target->connected); + WARN_ON_ONCE(ch->connected); ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); @@ -811,35 +810,19 @@ static bool srp_queue_remove_work(struct srp_target_port *target) return changed; } -static bool srp_change_conn_state(struct srp_target_port *target, - bool connected) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->connected != connected) { - target->connected = connected; - changed = true; - } - spin_unlock_irq(&target->lock); - - return changed; -} - static void srp_disconnect_target(struct srp_target_port *target) { struct srp_rdma_ch *ch; int i; - if (srp_change_conn_state(target, false)) { - /* XXX should send SRP_I_LOGOUT request */ + /* XXX should send SRP_I_LOGOUT request */ - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - } + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ch->connected = false; + if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); } } } @@ -986,14 +969,26 @@ static void srp_rport_delete(struct srp_rport *rport) srp_queue_remove_work(target); } +/** + * srp_connected_ch() - number of connected channels + * @target: SRP target port. + */ +static int srp_connected_ch(struct srp_target_port *target) +{ + int i, c = 0; + + for (i = 0; i < target->ch_count; i++) + c += target->ch[i].connected; + + return c; +} + static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) { struct srp_target_port *target = ch->target; int ret; - WARN_ON_ONCE(!multich && target->connected); - - target->qp_in_error = false; + WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); ret = srp_lookup_path(ch); if (ret) @@ -1016,7 +1011,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) */ switch (ch->status) { case 0: - srp_change_conn_state(target, true); + ch->connected = true; return 0; case SRP_PORT_REDIRECT: @@ -1243,13 +1238,13 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (j = 0; j < target->queue_size; ++j) list_add(&ch->tx_ring[j]->list, &ch->free_tx); } + + target->qp_in_error = false; + for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (ret || !ch->target) { - if (i > 1) - ret = 0; + if (ret || !ch->target) break; - } ret = srp_connect_ch(ch, multich); multich = true; } @@ -1929,7 +1924,7 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, return; } - if (target->connected && !target->qp_in_error) { + if (ch->connected && !target->qp_in_error) { if (wr_id & LOCAL_INV_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX "LOCAL_INV failed with status %d\n", @@ -2367,7 +2362,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); - srp_change_conn_state(target, false); + ch->connected = false; if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -2423,7 +2418,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (!target->connected || target->qp_in_error) + if (!ch->connected || target->qp_in_error) return -1; init_completion(&ch->tsk_mgmt_done); @@ -2766,6 +2761,13 @@ static int srp_sdev_count(struct Scsi_Host *host) return c; } +/* + * Return values: + * < 0 upon failure. Caller is responsible for SRP target port cleanup. + * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port + * removal has been scheduled. + * 0 and target->state != SRP_TARGET_REMOVED upon success. + */ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) { struct srp_rport_identifiers ids; @@ -2797,7 +2799,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); - if (!target->connected || target->qp_in_error) { + if (srp_connected_ch(target) < target->ch_count || + target->qp_in_error) { shost_printk(KERN_INFO, target->scsi_host, PFX "SCSI scan failed - removing SCSI host\n"); srp_queue_remove_work(target); @@ -3172,11 +3175,11 @@ static ssize_t srp_create_target(struct device *dev, ret = srp_parse_options(buf, target); if (ret) - goto err; + goto out; ret = scsi_init_shared_tag_map(target_host, target_host->can_queue); if (ret) - goto err; + goto out; target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; @@ -3187,7 +3190,7 @@ static ssize_t srp_create_target(struct device *dev, be64_to_cpu(target->ioc_guid), be64_to_cpu(target->initiator_ext)); ret = -EEXIST; - goto err; + goto out; } if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && @@ -3208,7 +3211,7 @@ static ssize_t srp_create_target(struct device *dev, spin_lock_init(&target->lock); ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) - goto err; + goto out; ret = -ENOMEM; target->ch_count = max_t(unsigned, num_online_nodes(), @@ -3219,7 +3222,7 @@ static ssize_t srp_create_target(struct device *dev, target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) - goto err; + goto out; node_idx = 0; for_each_online_node(node) { @@ -3270,7 +3273,7 @@ static ssize_t srp_create_target(struct device *dev, srp_free_ch_ib(target, ch); srp_free_req_data(target, ch); target->ch_count = ch - target->ch; - break; + goto connected; } } @@ -3280,6 +3283,7 @@ static ssize_t srp_create_target(struct device *dev, node_idx++; } +connected: target->scsi_host->nr_hw_queues = target->ch_count; ret = srp_add_target(host, target); @@ -3302,6 +3306,8 @@ static ssize_t srp_create_target(struct device *dev, mutex_unlock(&host->add_target_mutex); scsi_host_put(target->scsi_host); + if (ret < 0) + scsi_host_put(target->scsi_host); return ret; @@ -3315,9 +3321,6 @@ static ssize_t srp_create_target(struct device *dev, } kfree(target->ch); - -err: - scsi_host_put(target_host); goto out; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a611556406ac0..e690847a46dd3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -170,6 +170,7 @@ struct srp_rdma_ch { struct completion tsk_mgmt_done; u8 tsk_mgmt_status; + bool connected; }; /** @@ -214,7 +215,6 @@ struct srp_target_port { __be16 pkey; u32 rq_tmo_jiffies; - bool connected; int zero_req_lim; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9b84b4c0a000a..6fbc7bc824d29 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1334,7 +1334,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) BUG_ON(ch->sess == NULL); - target_put_sess_cmd(ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); goto out; } @@ -1365,11 +1365,11 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; default: WARN(1, "Unexpected command state (%d)", state); @@ -1679,7 +1679,7 @@ static int srpt_check_stop_free(struct se_cmd *cmd) struct srpt_send_ioctx *ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); - return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + return target_put_sess_cmd(&ioctx->cmd); } /** @@ -3074,7 +3074,7 @@ static void srpt_queue_response(struct se_cmd *cmd) ioctx->tag); srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); } } diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index a18f41b89b6a9..2ae522f0d2b2f 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -290,19 +290,14 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - int retval; - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; + mutex_lock(&evdev->mutex); - if (!evdev->exist || client->revoked) - retval = -ENODEV; - else - retval = input_flush_device(&evdev->handle, file); + if (evdev->exist && !client->revoked) + input_flush_device(&evdev->handle, file); mutex_unlock(&evdev->mutex); - return retval; + return 0; } static void evdev_free(struct device *dev) diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index 097d7216d98ee..c6dc644aa5806 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -246,7 +246,7 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) * convert it to descriptor. */ if (!button->gpiod && gpio_is_valid(button->gpio)) { - unsigned flags = 0; + unsigned flags = GPIOF_IN; if (button->active_low) flags |= GPIOF_ACTIVE_LOW; diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index a353b7de6d22e..4b9e31a5b3f86 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "psmouse.h" #include "alps.h" @@ -99,6 +100,7 @@ static const struct alps_nibble_commands alps_v6_nibble_commands[] = { #define ALPS_FOUR_BUTTONS 0x40 /* 4 direction button present */ #define ALPS_PS2_INTERLEAVED 0x80 /* 3-byte PS/2 packet interleaved with 6-byte ALPS packet */ +#define ALPS_STICK_BITS 0x100 /* separate stick button bits */ #define ALPS_BUTTONPAD 0x200 /* device is a clickpad */ static const struct alps_model_info alps_model_data[] = { @@ -157,6 +159,43 @@ static const struct alps_protocol_info alps_v8_protocol_data = { ALPS_PROTO_V8, 0x18, 0x18, 0 }; +/* + * Some v2 models report the stick buttons in separate bits + */ +static const struct dmi_system_id alps_dmi_has_separate_stick_buttons[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Extrapolated from other entries */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D420"), + }, + }, + { + /* Reported-by: Hans de Bruin */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D430"), + }, + }, + { + /* Reported-by: Hans de Goede */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D620"), + }, + }, + { + /* Extrapolated from other entries */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D630"), + }, + }, +#endif + { } +}; + static void alps_set_abs_params_st(struct alps_data *priv, struct input_dev *dev1); static void alps_set_abs_params_mt(struct alps_data *priv, @@ -251,9 +290,8 @@ static void alps_process_packet_v1_v2(struct psmouse *psmouse) return; } - /* Non interleaved V2 dualpoint has separate stick button bits */ - if (priv->proto_version == ALPS_PROTO_V2 && - priv->flags == (ALPS_PASS | ALPS_DUALPOINT)) { + /* Some models have separate stick button bits */ + if (priv->flags & ALPS_STICK_BITS) { left |= packet[0] & 1; right |= packet[0] & 2; middle |= packet[0] & 4; @@ -2556,6 +2594,8 @@ static int alps_set_protocol(struct psmouse *psmouse, priv->set_abs_params = alps_set_abs_params_st; priv->x_max = 1023; priv->y_max = 767; + if (dmi_check_system(alps_dmi_has_separate_stick_buttons)) + priv->flags |= ALPS_STICK_BITS; break; case ALPS_PROTO_V3: diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index ce3d40004458c..0f5b400706d77 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1214,7 +1214,7 @@ static int elantech_set_input_params(struct psmouse *psmouse) input_set_abs_params(dev, ABS_TOOL_WIDTH, ETP_WMIN_V2, ETP_WMAX_V2, 0, 0); } - input_mt_init_slots(dev, 2, 0); + input_mt_init_slots(dev, 2, INPUT_MT_SEMI_MT); input_set_abs_params(dev, ABS_MT_POSITION_X, x_min, x_max, 0, 0); input_set_abs_params(dev, ABS_MT_POSITION_Y, y_min, y_max, 0, 0); break; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 35c8d0ceabeeb..3a32caf06bf1d 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1199,7 +1199,7 @@ static void set_input_params(struct psmouse *psmouse, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0); - input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK); + input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK); /* Image sensors can signal 4 and 5 finger clicks */ __set_bit(BTN_TOOL_QUADTAP, dev->keybit); diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c index e272f06258cef..a3f0f5a47490e 100644 --- a/drivers/input/mouse/vmmouse.c +++ b/drivers/input/mouse/vmmouse.c @@ -458,8 +458,6 @@ int vmmouse_init(struct psmouse *psmouse) priv->abs_dev = abs_dev; psmouse->private = priv; - input_set_capability(rel_dev, EV_REL, REL_WHEEL); - /* Set up and register absolute device */ snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys); @@ -475,10 +473,6 @@ int vmmouse_init(struct psmouse *psmouse) abs_dev->id.version = psmouse->model; abs_dev->dev.parent = &psmouse->ps2dev.serio->dev; - error = input_register_device(priv->abs_dev); - if (error) - goto init_fail; - /* Set absolute device capabilities */ input_set_capability(abs_dev, EV_KEY, BTN_LEFT); input_set_capability(abs_dev, EV_KEY, BTN_RIGHT); @@ -488,6 +482,13 @@ int vmmouse_init(struct psmouse *psmouse) input_set_abs_params(abs_dev, ABS_X, 0, VMMOUSE_MAX_X, 0, 0); input_set_abs_params(abs_dev, ABS_Y, 0, VMMOUSE_MAX_Y, 0, 0); + error = input_register_device(priv->abs_dev); + if (error) + goto init_fail; + + /* Add wheel capability to the relative device */ + input_set_capability(rel_dev, EV_REL, REL_WHEEL); + psmouse->protocol_handler = vmmouse_process_byte; psmouse->disconnect = vmmouse_disconnect; psmouse->reconnect = vmmouse_reconnect; diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index c11556563ef06..68f5f4a0f1e72 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -257,6 +257,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"), }, }, + { + /* Fujitsu Lifebook U745 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U745"), + }, + }, { /* Fujitsu T70H */ .matches = { diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 2c2107147319e..8f3e243a62bf3 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -78,7 +78,7 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, } ret = i2c_master_recv(tsdata->client, rdbuf, readsize); - if (ret != sizeof(rdbuf)) { + if (ret != readsize) { dev_err(&tsdata->client->dev, "%s: i2c_master_recv failed(), ret=%d\n", __func__, ret); diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index f2c6c352c55af..2c41107240dec 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -627,6 +627,9 @@ static int dmc_tsc10_init(struct usbtouch_usb *usbtouch) goto err_out; } + /* TSC-25 data sheet specifies a delay after the RESET command */ + msleep(150); + /* set coordinate output rate */ buf[0] = buf[1] = 0xFF; ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0), diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e1c7e9e510451..f0fd5352f8ef9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1869,9 +1869,15 @@ static void free_pt_##LVL (unsigned long __pt) \ pt = (u64 *)__pt; \ \ for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ if (!IOMMU_PTE_PRESENT(pt[i])) \ continue; \ \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ FN(p); \ } \ @@ -2093,8 +2099,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) static void clear_dte_entry(u16 devid) { /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(devid); } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 450ef5001a65a..1750db0ef61cb 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -227,6 +227,10 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write); + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -1065,6 +1069,34 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90); } +/* + * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) + * Workaround: + * BIOS should enable ATS write permission check by setting + * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b + */ +static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) +{ + u32 value; + + if ((boot_cpu_data.x86 != 0x15) || + (boot_cpu_data.x86_model < 0x30) || + (boot_cpu_data.x86_model > 0x3f)) + return; + + /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ + value = iommu_read_l2(iommu, 0x47); + + if (value & BIT(0)) + return; + + /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ + iommu_write_l2(iommu, 0x47, value | BIT(0)); + + pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n", + dev_name(&iommu->dev->dev)); +} + /* * This function clues the initialization function for one IOMMU * together and also allocates the command buffer and programs the @@ -1192,8 +1224,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* Check if the performance counters can be written to */ - if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || - (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || + if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) || + (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; @@ -1339,6 +1371,7 @@ static int iommu_init_pci(struct amd_iommu *iommu) } amd_iommu_erratum_746_workaround(iommu); + amd_iommu_ats_write_check_workaround(iommu); iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, amd_iommu_groups, "ivhd%d", @@ -2362,22 +2395,15 @@ u8 amd_iommu_pc_get_max_counters(u16 devid) } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) { - struct amd_iommu *iommu; u32 offset; u32 max_offset_lim; - /* Make sure the IOMMU PC resource is available */ - if (!amd_iommu_pc_present) - return -ENODEV; - - /* Locate the iommu associated with the device ID */ - iommu = amd_iommu_rlookup_table[devid]; - /* Check for valid iommu and pc register indexing */ - if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) + if (WARN_ON((fxn > 0x28) || (fxn & 7))) return -ENODEV; offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); @@ -2401,3 +2427,16 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); + +int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present || iommu == NULL) + return -ENODEV; + + return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn, + value, is_write); +} diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 05030e523771a..cbfd0f4c46082 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -295,6 +295,7 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +#define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_FLAG_IOTLB (0x01UL << 32) #define DTE_FLAG_GV (0x01ULL << 55) #define DTE_GLX_SHIFT (56) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 3465faf1809e4..45087c3e5c572 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -508,6 +508,13 @@ static void do_fault(struct work_struct *work) goto out; } + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) { + /* handle_mm_fault would BUG_ON() */ + up_read(&mm->mmap_sem); + handle_fault_error(fault); + goto out; + } + ret = handle_mm_fault(mm, vma, address, write); if (ret & VM_FAULT_ERROR) { /* failed to service fault */ diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 66a803b9dd3af..65075ef75e2a3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1567,7 +1567,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) { + if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9847613085e15..5a2ec39e1fd9b 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1342,7 +1342,7 @@ void dmar_disable_qi(struct intel_iommu *iommu) raw_spin_lock_irqsave(&iommu->register_lock, flags); - sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + sts = readl(iommu->reg + DMAR_GSTS_REG); if (!(sts & DMA_GSTS_QIES)) goto end; diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index abeedc9a78c27..2570f2a25dc43 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -41,7 +41,6 @@ struct pamu_isr_data { static struct paace *ppaact; static struct paace *spaact; -static struct ome *omt __initdata; /* * Table for matching compatible strings, for device tree @@ -50,7 +49,7 @@ static struct ome *omt __initdata; * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0" * string would be used. */ -static const struct of_device_id guts_device_ids[] __initconst = { +static const struct of_device_id guts_device_ids[] = { { .compatible = "fsl,qoriq-device-config-1.0", }, { .compatible = "fsl,qoriq-device-config-2.0", }, {} @@ -599,7 +598,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) * Memory accesses to QMAN and BMAN private memory need not be coherent, so * clear the PAACE entry coherency attribute for them. */ -static void __init setup_qbman_paace(struct paace *ppaace, int paace_type) +static void setup_qbman_paace(struct paace *ppaace, int paace_type) { switch (paace_type) { case QMAN_PAACE: @@ -629,7 +628,7 @@ static void __init setup_qbman_paace(struct paace *ppaace, int paace_type) * this table to translate device transaction to appropriate corenet * transaction. */ -static void __init setup_omt(struct ome *omt) +static void setup_omt(struct ome *omt) { struct ome *ome; @@ -666,7 +665,7 @@ static void __init setup_omt(struct ome *omt) * Get the maximum number of PAACT table entries * and subwindows supported by PAMU */ -static void __init get_pamu_cap_values(unsigned long pamu_reg_base) +static void get_pamu_cap_values(unsigned long pamu_reg_base) { u32 pc_val; @@ -676,9 +675,9 @@ static void __init get_pamu_cap_values(unsigned long pamu_reg_base) } /* Setup PAMU registers pointing to PAACT, SPAACT and OMT */ -static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, - phys_addr_t ppaact_phys, phys_addr_t spaact_phys, - phys_addr_t omt_phys) +static int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, + phys_addr_t ppaact_phys, phys_addr_t spaact_phys, + phys_addr_t omt_phys) { u32 *pc; struct pamu_mmap_regs *pamu_regs; @@ -720,7 +719,7 @@ static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu } /* Enable all device LIODNS */ -static void __init setup_liodns(void) +static void setup_liodns(void) { int i, len; struct paace *ppaace; @@ -846,7 +845,7 @@ struct ccsr_law { /* * Create a coherence subdomain for a given memory block. */ -static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) +static int create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) { struct device_node *np; const __be32 *iprop; @@ -988,7 +987,7 @@ static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) static const struct { u32 svr; u32 port_id; -} port_id_map[] __initconst = { +} port_id_map[] = { {(SVR_P2040 << 8) | 0x10, 0xFF000000}, /* P2040 1.0 */ {(SVR_P2040 << 8) | 0x11, 0xFF000000}, /* P2040 1.1 */ {(SVR_P2041 << 8) | 0x10, 0xFF000000}, /* P2041 1.0 */ @@ -1006,7 +1005,7 @@ static const struct { #define SVR_SECURITY 0x80000 /* The Security (E) bit */ -static int __init fsl_pamu_probe(struct platform_device *pdev) +static int fsl_pamu_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; void __iomem *pamu_regs = NULL; @@ -1022,6 +1021,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) int irq; phys_addr_t ppaact_phys; phys_addr_t spaact_phys; + struct ome *omt; phys_addr_t omt_phys; size_t mem_size = 0; unsigned int order = 0; @@ -1200,7 +1200,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) return ret; } -static struct platform_driver fsl_of_pamu_driver __initdata = { +static struct platform_driver fsl_of_pamu_driver = { .driver = { .name = "fsl-of-pamu", }, diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5ecfaf29933ad..b85a8614c1280 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -681,6 +681,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu struct context_entry *context; u64 *entry; + entry = &root->lo; if (ecs_enabled(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; @@ -688,7 +689,6 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu } devfn *= 2; } - entry = &root->lo; if (*entry & 1) context = phys_to_virt(*entry & VTD_PAGE_MASK); else { @@ -1756,8 +1756,9 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; struct page *freelist = NULL; - int i; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1777,8 +1778,10 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); - for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) - iommu_detach_domain(domain, g_iommus[i]); + for_each_active_iommu(iommu, drhd) + if (domain_type_is_vm(domain) || + test_bit(iommu->seq_id, domain->iommu_bmp)) + iommu_detach_domain(domain, iommu); rcu_read_unlock(); dma_free_pagelist(freelist); @@ -2030,15 +2033,19 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; /* It is large page*/ if (largepage_lvl > 1) { + unsigned long nr_superpages, end_pfn; + pteval |= DMA_PTE_LARGE_PAGE; lvl_pages = lvl_to_nr_pages(largepage_lvl); + + nr_superpages = sg_res / lvl_pages; + end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; + /* * Ensure that old small page tables are - * removed to make room for superpage, - * if they exist. + * removed to make room for superpage(s). */ - dma_pte_free_pagetable(domain, iov_pfn, - iov_pfn + lvl_pages - 1); + dma_pte_free_pagetable(domain, iov_pfn, end_pfn); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -3921,14 +3928,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) dev = pci_physfn(dev); for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; - if (!bridge || !pci_is_pcie(bridge) || + /* If it's an integrated device, allow ATS */ + if (!bridge) + return 1; + /* Connected via non-PCIe: no ATS */ + if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) return 0; + /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; } - if (!bridge) - return 0; rcu_read_lock(); list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 5709ae9c3e771..04b39be8f1f36 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -544,7 +544,7 @@ static void iommu_disable_irq_remapping(struct intel_iommu *iommu) raw_spin_lock_irqsave(&iommu->register_lock, flags); - sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + sts = readl(iommu->reg + DMAR_GSTS_REG); if (!(sts & DMA_GSTS_IRES)) goto end; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4e460216bd164..937832cfa48ee 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -200,6 +200,10 @@ typedef u64 arm_lpae_iopte; static bool selftest_running = false; +static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep); + static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, @@ -207,10 +211,21 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte = prot; - /* We require an unmap first */ if (iopte_leaf(*ptep, lvl)) { + /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; + } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { + /* + * We need to unmap and free the old table before + * overwriting it with a block entry. + */ + arm_lpae_iopte *tblp; + size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + + tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); + if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz)) + return -EINVAL; } if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) @@ -326,17 +341,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *start, *end; unsigned long table_size; - /* Only leaf entries at the last level */ - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - return; - if (lvl == ARM_LPAE_START_LVL(data)) table_size = data->pgd_size; else table_size = 1UL << data->pg_shift; start = ptep; - end = (void *)ptep + table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + end = ptep; + else + end = (void *)ptep + table_size; while (ptep != end) { arm_lpae_iopte pte = *ptep++; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index c845d99ecf6b8..e0ff5f4d7fed5 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -26,6 +26,7 @@ struct tegra_smmu { const struct tegra_smmu_soc *soc; unsigned long pfn_mask; + unsigned long tlb_mask; unsigned long *asids; struct mutex lock; @@ -65,7 +66,8 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_TLB_CONFIG 0x14 #define SMMU_TLB_CONFIG_HIT_UNDER_MISS (1 << 29) #define SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION (1 << 28) -#define SMMU_TLB_CONFIG_ACTIVE_LINES(x) ((x) & 0x3f) +#define SMMU_TLB_CONFIG_ACTIVE_LINES(smmu) \ + ((smmu)->soc->num_tlb_lines & (smmu)->tlb_mask) #define SMMU_PTC_CONFIG 0x18 #define SMMU_PTC_CONFIG_ENABLE (1 << 29) @@ -716,6 +718,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu->pfn_mask = BIT_MASK(mc->soc->num_address_bits - PAGE_SHIFT) - 1; dev_dbg(dev, "address bits: %u, PFN mask: %#lx\n", mc->soc->num_address_bits, smmu->pfn_mask); + smmu->tlb_mask = (smmu->soc->num_tlb_lines << 1) - 1; + dev_dbg(dev, "TLB lines: %u, mask: %#lx\n", smmu->soc->num_tlb_lines, + smmu->tlb_mask); value = SMMU_PTC_CONFIG_ENABLE | SMMU_PTC_CONFIG_INDEX_MAP(0x3f); @@ -725,7 +730,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu_writel(smmu, value, SMMU_PTC_CONFIG); value = SMMU_TLB_CONFIG_HIT_UNDER_MISS | - SMMU_TLB_CONFIG_ACTIVE_LINES(0x20); + SMMU_TLB_CONFIG_ACTIVE_LINES(smmu); if (soc->supports_round_robin_arbitration) value |= SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION; diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 63cd031b2c28d..869d01dd40630 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val) priority > AT91_AIC_IRQ_MAX_PRIORITY) return -EINVAL; - *val &= AT91_AIC_PRIOR; + *val &= ~AT91_AIC_PRIOR; *val |= priority; return 0; diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index a2e8c3f876cbd..c2c578f0b268a 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -88,28 +88,36 @@ static void aic5_mask(struct irq_data *d) { struct irq_domain *domain = d->domain; struct irq_domain_chip_generic *dgc = domain->gc; - struct irq_chip_generic *gc = dgc->gc[0]; + struct irq_chip_generic *bgc = dgc->gc[0]; + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - /* Disable interrupt on AIC5 */ - irq_gc_lock(gc); + /* + * Disable interrupt on AIC5. We always take the lock of the + * first irq chip as all chips share the same registers. + */ + irq_gc_lock(bgc); irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(gc, 1, AT91_AIC5_IDCR); gc->mask_cache &= ~d->mask; - irq_gc_unlock(gc); + irq_gc_unlock(bgc); } static void aic5_unmask(struct irq_data *d) { struct irq_domain *domain = d->domain; struct irq_domain_chip_generic *dgc = domain->gc; - struct irq_chip_generic *gc = dgc->gc[0]; + struct irq_chip_generic *bgc = dgc->gc[0]; + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - /* Enable interrupt on AIC5 */ - irq_gc_lock(gc); + /* + * Enable interrupt on AIC5. We always take the lock of the + * first irq chip as all chips share the same registers. + */ + irq_gc_lock(bgc); irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(gc, 1, AT91_AIC5_IECR); gc->mask_cache |= d->mask; - irq_gc_unlock(gc); + irq_gc_unlock(bgc); } static int aic5_retrigger(struct irq_data *d) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 692fe2bc81979..c12bb93334ff9 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -68,7 +68,9 @@ static struct irq_chip crossbar_chip = { .irq_mask = irq_chip_mask_parent, .irq_unmask = irq_chip_unmask_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, - .irq_set_wake = irq_chip_set_wake_parent, + .irq_set_type = irq_chip_set_type_parent, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 1b7e155869f6c..9a791dd52199c 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -75,6 +75,13 @@ struct its_node { #define ITS_ITT_ALIGN SZ_256 +struct event_lpi_map { + unsigned long *lpi_map; + u16 *col_map; + irq_hw_number_t lpi_base; + int nr_lpis; +}; + /* * The ITS view of a device - belongs to an ITS, a collection, owns an * interrupt translation table, and a list of interrupts. @@ -82,11 +89,8 @@ struct its_node { struct its_device { struct list_head entry; struct its_node *its; - struct its_collection *collection; + struct event_lpi_map event_map; void *itt; - unsigned long *lpi_map; - irq_hw_number_t lpi_base; - int nr_lpis; u32 nr_ites; u32 device_id; }; @@ -99,6 +103,14 @@ static struct rdists *gic_rdists; #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) +static struct its_collection *dev_event_to_col(struct its_device *its_dev, + u32 event) +{ + struct its_node *its = its_dev->its; + + return its->collections + its_dev->event_map.col_map[event]; +} + /* * ITS command descriptors - parameters to be encoded in a command * block. @@ -134,7 +146,7 @@ struct its_cmd_desc { struct { struct its_device *dev; struct its_collection *col; - u32 id; + u32 event_id; } its_movi_cmd; struct { @@ -241,7 +253,7 @@ static struct its_collection *its_build_mapd_cmd(struct its_cmd_block *cmd, its_fixup_cmd(cmd); - return desc->its_mapd_cmd.dev->collection; + return NULL; } static struct its_collection *its_build_mapc_cmd(struct its_cmd_block *cmd, @@ -260,52 +272,72 @@ static struct its_collection *its_build_mapc_cmd(struct its_cmd_block *cmd, static struct its_collection *its_build_mapvi_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_mapvi_cmd.dev, + desc->its_mapvi_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_MAPVI); its_encode_devid(cmd, desc->its_mapvi_cmd.dev->device_id); its_encode_event_id(cmd, desc->its_mapvi_cmd.event_id); its_encode_phys_id(cmd, desc->its_mapvi_cmd.phys_id); - its_encode_collection(cmd, desc->its_mapvi_cmd.dev->collection->col_id); + its_encode_collection(cmd, col->col_id); its_fixup_cmd(cmd); - return desc->its_mapvi_cmd.dev->collection; + return col; } static struct its_collection *its_build_movi_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_movi_cmd.dev, + desc->its_movi_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_MOVI); its_encode_devid(cmd, desc->its_movi_cmd.dev->device_id); - its_encode_event_id(cmd, desc->its_movi_cmd.id); + its_encode_event_id(cmd, desc->its_movi_cmd.event_id); its_encode_collection(cmd, desc->its_movi_cmd.col->col_id); its_fixup_cmd(cmd); - return desc->its_movi_cmd.dev->collection; + return col; } static struct its_collection *its_build_discard_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_discard_cmd.dev, + desc->its_discard_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_DISCARD); its_encode_devid(cmd, desc->its_discard_cmd.dev->device_id); its_encode_event_id(cmd, desc->its_discard_cmd.event_id); its_fixup_cmd(cmd); - return desc->its_discard_cmd.dev->collection; + return col; } static struct its_collection *its_build_inv_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_inv_cmd.dev, + desc->its_inv_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_INV); its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id); its_encode_event_id(cmd, desc->its_inv_cmd.event_id); its_fixup_cmd(cmd); - return desc->its_inv_cmd.dev->collection; + return col; } static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, @@ -497,7 +529,7 @@ static void its_send_movi(struct its_device *dev, desc.its_movi_cmd.dev = dev; desc.its_movi_cmd.col = col; - desc.its_movi_cmd.id = id; + desc.its_movi_cmd.event_id = id; its_send_single_command(dev->its, its_build_movi_cmd, &desc); } @@ -528,7 +560,7 @@ static void its_send_invall(struct its_node *its, struct its_collection *col) static inline u32 its_get_event_id(struct irq_data *d) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); - return d->hwirq - its_dev->lpi_base; + return d->hwirq - its_dev->event_map.lpi_base; } static void lpi_set_config(struct irq_data *d, bool enable) @@ -583,7 +615,7 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, target_col = &its_dev->its->collections[cpu]; its_send_movi(its_dev, target_col, id); - its_dev->collection = target_col; + its_dev->event_map.col_map[id] = cpu; return IRQ_SET_MASK_OK_DONE; } @@ -713,8 +745,10 @@ static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) return bitmap; } -static void its_lpi_free(unsigned long *bitmap, int base, int nr_ids) +static void its_lpi_free(struct event_lpi_map *map) { + int base = map->lpi_base; + int nr_ids = map->nr_lpis; int lpi; spin_lock(&lpi_lock); @@ -731,7 +765,8 @@ static void its_lpi_free(unsigned long *bitmap, int base, int nr_ids) spin_unlock(&lpi_lock); - kfree(bitmap); + kfree(map->lpi_map); + kfree(map->col_map); } /* @@ -886,8 +921,10 @@ static int its_alloc_tables(struct its_node *its) * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; - if (!shr) + if (!shr) { cache = GITS_BASER_nC; + __flush_dcache_area(base, alloc_size); + } goto retry_baser; } @@ -1099,11 +1136,11 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, struct its_device *dev; unsigned long *lpi_map; unsigned long flags; + u16 *col_map = NULL; void *itt; int lpi_base; int nr_lpis; int nr_ites; - int cpu; int sz; dev = kzalloc(sizeof(*dev), GFP_KERNEL); @@ -1117,20 +1154,26 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); + if (lpi_map) + col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL); - if (!dev || !itt || !lpi_map) { + if (!dev || !itt || !lpi_map || !col_map) { kfree(dev); kfree(itt); kfree(lpi_map); + kfree(col_map); return NULL; } + __flush_dcache_area(itt, sz); + dev->its = its; dev->itt = itt; dev->nr_ites = nr_ites; - dev->lpi_map = lpi_map; - dev->lpi_base = lpi_base; - dev->nr_lpis = nr_lpis; + dev->event_map.lpi_map = lpi_map; + dev->event_map.col_map = col_map; + dev->event_map.lpi_base = lpi_base; + dev->event_map.nr_lpis = nr_lpis; dev->device_id = dev_id; INIT_LIST_HEAD(&dev->entry); @@ -1138,10 +1181,6 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, list_add(&dev->entry, &its->its_device_list); raw_spin_unlock_irqrestore(&its->lock, flags); - /* Bind the device to the first possible CPU */ - cpu = cpumask_first(cpu_online_mask); - dev->collection = &its->collections[cpu]; - /* Map device to its ITT */ its_send_mapd(dev, 1); @@ -1163,12 +1202,13 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) { int idx; - idx = find_first_zero_bit(dev->lpi_map, dev->nr_lpis); - if (idx == dev->nr_lpis) + idx = find_first_zero_bit(dev->event_map.lpi_map, + dev->event_map.nr_lpis); + if (idx == dev->event_map.nr_lpis) return -ENOSPC; - *hwirq = dev->lpi_base + idx; - set_bit(idx, dev->lpi_map); + *hwirq = dev->event_map.lpi_base + idx; + set_bit(idx, dev->event_map.lpi_map); return 0; } @@ -1288,7 +1328,8 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq, &its_irq_chip, its_dev); dev_dbg(info->scratchpad[1].ptr, "ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->lpi_base), (int)hwirq, virq + i); + (int)(hwirq - its_dev->event_map.lpi_base), + (int)hwirq, virq + i); } return 0; @@ -1300,6 +1341,9 @@ static void its_irq_domain_activate(struct irq_domain *domain, struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); + /* Bind the LPI to the first possible CPU */ + its_dev->event_map.col_map[event] = cpumask_first(cpu_online_mask); + /* Map the GIC IRQ and event to the device */ its_send_mapvi(its_dev, d->hwirq, event); } @@ -1327,17 +1371,16 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, u32 event = its_get_event_id(data); /* Mark interrupt index as unused */ - clear_bit(event, its_dev->lpi_map); + clear_bit(event, its_dev->event_map.lpi_map); /* Nuke the entry in the domain */ irq_domain_reset_irq_data(data); } /* If all interrupts have been freed, start mopping the floor */ - if (bitmap_empty(its_dev->lpi_map, its_dev->nr_lpis)) { - its_lpi_free(its_dev->lpi_map, - its_dev->lpi_base, - its_dev->nr_lpis); + if (bitmap_empty(its_dev->event_map.lpi_map, + its_dev->event_map.nr_lpis)) { + its_lpi_free(&its_dev->event_map); /* Unmap device/itt */ its_send_mapd(its_dev, 0); diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c index f67bbd80433e8..ab5353a96a820 100644 --- a/drivers/irqchip/irq-tegra.c +++ b/drivers/irqchip/irq-tegra.c @@ -215,6 +215,7 @@ static struct irq_chip tegra_ictlr_chip = { .irq_unmask = tegra_unmask, .irq_retrigger = tegra_retrigger, .irq_set_wake = tegra_set_wake, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 8c91fd5eb6fdd..3ac9c4194814c 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty) cs->hw.ser->tty = tty; atomic_set(&cs->hw.ser->refcnt, 1); init_completion(&cs->hw.ser->dead_cmp); - tty->disc_data = cs; + /* Set the amount of data we're willing to receive per call + * from the hardware driver to half of the input buffer size + * to leave some reserve. + * Note: We don't do flow control towards the hardware driver. + * If more data is received than will fit into the input buffer, + * it will be dropped and an error will be logged. This should + * never happen as the device is slow and the buffer size ample. + */ + tty->receive_room = RBUFSIZE/2; + /* OK.. Initialization of the datastructures and the HW is done.. Now * startup system and notify the LL that we are ready to run */ diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index c4198fa490bfa..9c1e8adaf4fc8 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -301,6 +301,8 @@ isdn_ppp_open(int min, struct file *file) is->compflags = 0; is->reset = isdn_ppp_ccp_reset_alloc(is); + if (!is->reset) + return -ENOMEM; is->lp = NULL; is->mp_seqno = 0; /* MP sequence number */ @@ -320,6 +322,10 @@ isdn_ppp_open(int min, struct file *file) * VJ header compression init */ is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ + if (IS_ERR(is->slcomp)) { + isdn_ppp_ccp_reset_free(is); + return PTR_ERR(is->slcomp); + } #endif #ifdef CONFIG_IPPP_FILTER is->pass_filter = NULL; @@ -567,10 +573,8 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) is->maxcid = val; #ifdef CONFIG_ISDN_PPP_VJ sltmp = slhc_init(16, val); - if (!sltmp) { - printk(KERN_ERR "ippp, can't realloc slhc struct\n"); - return -ENOMEM; - } + if (IS_ERR(sltmp)) + return PTR_ERR(sltmp); if (is->slcomp) slhc_free(is->slcomp); is->slcomp = sltmp; diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 728681debdbe4..557f8a53a062c 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -187,6 +187,7 @@ void led_classdev_resume(struct led_classdev *led_cdev) } EXPORT_SYMBOL_GPL(led_classdev_resume); +#ifdef CONFIG_PM_SLEEP static int led_suspend(struct device *dev) { struct led_classdev *led_cdev = dev_get_drvdata(dev); @@ -206,11 +207,9 @@ static int led_resume(struct device *dev) return 0; } +#endif -static const struct dev_pm_ops leds_class_dev_pm_ops = { - .suspend = led_suspend, - .resume = led_resume, -}; +static SIMPLE_DEV_PM_OPS(leds_class_dev_pm_ops, led_suspend, led_resume); static int match_name(struct device *dev, const void *data) { @@ -224,12 +223,15 @@ static int led_classdev_next_name(const char *init_name, char *name, { unsigned int i = 0; int ret = 0; + struct device *dev; strlcpy(name, init_name, len); - while (class_find_device(leds_class, NULL, name, match_name) && - (ret < len)) + while ((ret < len) && + (dev = class_find_device(leds_class, NULL, name, match_name))) { + put_device(dev); ret = snprintf(name, len, "%s_%u", init_name, ++i); + } if (ret >= len) return -ENOMEM; diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 3ee198b658438..cc7ece1712b50 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -435,7 +435,7 @@ int wf_unregister_client(struct notifier_block *nb) { mutex_lock(&wf_lock); blocking_notifier_chain_unregister(&wf_client_list, nb); - wf_client_count++; + wf_client_count--; if (wf_client_count == 0) wf_stop_thread(); mutex_unlock(&wf_lock); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 00cde40db5726..43829d9493f70 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c) do { ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); + cond_resched(); if (ret && ret != -EAGAIN) pr_warn("gc failed!"); @@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, rw_lock(true, b, b->level); if (b->key.ptr[0] != btree_ptr || - b->seq != seq + 1) + b->seq != seq + 1) { + op->lock = b->level; goto out; + } } SET_KEY_PTRS(check_key, 1); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4dd2bb7167f05..42522c8f13c6f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -708,6 +708,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || sysfs_create_link(&c->kobj, &d->kobj, d->name), "Couldn't create device <-> cache set symlinks"); + + clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); } static void bcache_device_detach(struct bcache_device *d) @@ -878,8 +880,11 @@ void bch_cached_dev_run(struct cached_dev *dc) buf[SB_LABEL_SIZE] = '\0'; env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); - if (atomic_xchg(&dc->running, 1)) + if (atomic_xchg(&dc->running, 1)) { + kfree(env[1]); + kfree(env[2]); return; + } if (!d->c && BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { @@ -1967,6 +1972,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (attr == &ksysfs_register_quiet) + goto out; } goto err; } @@ -2005,8 +2012,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, err_close: blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); err: - if (attr != &ksysfs_register_quiet) - pr_info("error opening %s: %s", path, err); + pr_info("error opening %s: %s", path, err); ret = -EINVAL; goto out; } @@ -2100,8 +2106,10 @@ static int __init bcache_init(void) closure_debug_init(); bcache_major = register_blkdev(0, "bcache"); - if (bcache_major < 0) + if (bcache_major < 0) { + unregister_reboot_notifier(&reboot); return bcache_major; + } if (!(bcache_wq = create_workqueue("bcache")) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f1986bcd1bf05..540256a0df4fd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, static bool dirty_pred(struct keybuf *buf, struct bkey *k) { + struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys); + + BUG_ON(KEY_INODE(k) != dc->disk.id); + return KEY_DIRTY(k); } @@ -372,11 +376,24 @@ static void refill_full_stripes(struct cached_dev *dc) } } +/* + * Returns true if we scanned the entire disk + */ static bool refill_dirty(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; + struct bkey start = KEY(dc->disk.id, 0, 0); struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); - bool searched_from_start = false; + struct bkey start_pos; + + /* + * make sure keybuf pos is inside the range for this disk - at bringup + * we might not be attached yet so this disk's inode nr isn't + * initialized then + */ + if (bkey_cmp(&buf->last_scanned, &start) < 0 || + bkey_cmp(&buf->last_scanned, &end) > 0) + buf->last_scanned = start; if (dc->partial_stripes_expensive) { refill_full_stripes(dc); @@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc) return false; } - if (bkey_cmp(&buf->last_scanned, &end) >= 0) { - buf->last_scanned = KEY(dc->disk.id, 0, 0); - searched_from_start = true; - } - + start_pos = buf->last_scanned; bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); - return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; + if (bkey_cmp(&buf->last_scanned, &end) < 0) + return false; + + /* + * If we get to the end start scanning again from the beginning, and + * only scan up to where we initially started scanning from: + */ + buf->last_scanned = start; + bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred); + + return bkey_cmp(&buf->last_scanned, &start_pos) >= 0; } static int bch_writeback_thread(void *arg) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 0a9dab187b79c..073a042aed243 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, static inline void bch_writeback_queue(struct cached_dev *dc) { - wake_up_process(dc->writeback_thread); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) + wake_up_process(dc->writeback_thread); } static inline void bch_writeback_add(struct cached_dev *dc) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 135a0907e9de4..a7621a2589365 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; - bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (bitmap->storage.sb_page == NULL) return -ENOMEM; bitmap->storage.sb_page->index = 0; @@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) sb->state = cpu_to_le32(bitmap->flags); bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + bitmap->mddev->bitmap_info.nodes = 0; kunmap_atomic(sb); @@ -611,8 +612,16 @@ static int bitmap_read_sb(struct bitmap *bitmap) daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); + /* XXX: This is a hack to ensure that we don't use clustering + * in case: + * - dm-raid is in use and + * - the nodes written in bitmap_sb is erroneous. + */ + if (!bitmap->mddev->sync_super) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) @@ -1991,7 +2000,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) ret = bitmap_storage_alloc(&store, chunks, !bitmap->mddev->bitmap_info.external, - bitmap->cluster_slot); + mddev_is_clustered(bitmap->mddev) + ? bitmap->cluster_slot : 0); if (ret) goto err; diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index b04d1f904d076..8308f4b434ec3 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e) /* Public interface (see dm-cache-policy.h */ static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { struct policy *p = to_policy(pe); struct wb_cache_entry *e; @@ -434,7 +435,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, static struct dm_cache_policy_type wb_policy_type = { .name = "cleaner", .version = {1, 0, 0}, - .hint_size = 0, + .hint_size = 4, .owner = THIS_MODULE, .create = wb_create }; diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h index 2256a1f24f73e..c198e6defb9cd 100644 --- a/drivers/md/dm-cache-policy-internal.h +++ b/drivers/md/dm-cache-policy-internal.h @@ -16,9 +16,10 @@ */ static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { - return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result); + return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result); } static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 3ddd1162334df..515d44bf24d3d 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -693,9 +693,10 @@ static void requeue(struct mq_policy *mq, struct entry *e) * - set the hit count to a hard coded value other than 1, eg, is it better * if it goes in at level 2? */ -static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +static int demote_cblock(struct mq_policy *mq, + struct policy_locker *locker, dm_oblock_t *oblock) { - struct entry *demoted = pop(mq, &mq->cache_clean); + struct entry *demoted = peek(&mq->cache_clean); if (!demoted) /* @@ -707,6 +708,13 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) */ return -ENOSPC; + if (locker->fn(locker, demoted->oblock)) + /* + * We couldn't lock the demoted block. + */ + return -EBUSY; + + del(mq, demoted); *oblock = demoted->oblock; free_entry(&mq->cache_pool, demoted); @@ -795,6 +803,7 @@ static int cache_entry_found(struct mq_policy *mq, * finding which cache block to use. */ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, + struct policy_locker *locker, struct policy_result *result) { int r; @@ -803,11 +812,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, /* Ensure there's a free cblock in the cache */ if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - r = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, locker, &result->old_oblock); if (r) { result->op = POLICY_MISS; return 0; } + } else result->op = POLICY_NEW; @@ -829,7 +839,8 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { int r = 0; @@ -842,7 +853,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, else { requeue(mq, e); - r = pre_cache_to_cache(mq, e, result); + r = pre_cache_to_cache(mq, e, locker, result); } return r; @@ -872,6 +883,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, } static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, + struct policy_locker *locker, struct policy_result *result) { int r; @@ -879,7 +891,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - r = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, locker, &result->old_oblock); if (unlikely(r)) { result->op = POLICY_MISS; insert_in_pre_cache(mq, oblock); @@ -907,11 +919,12 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) { if (can_migrate) - insert_in_cache(mq, oblock, result); + insert_in_cache(mq, oblock, locker, result); else return -EWOULDBLOCK; } else { @@ -928,7 +941,8 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, */ static int map(struct mq_policy *mq, dm_oblock_t oblock, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { int r = 0; struct entry *e = hash_lookup(mq, oblock); @@ -942,11 +956,11 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, else if (e) r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, - data_dir, result); + data_dir, locker, result); else r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, - data_dir, result); + data_dir, locker, result); if (r == -EWOULDBLOCK) result->op = POLICY_MISS; @@ -1012,7 +1026,8 @@ static void copy_tick(struct mq_policy *mq) static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { int r; struct mq_policy *mq = to_mq_policy(p); @@ -1028,7 +1043,7 @@ static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, iot_examine_bio(&mq->tracker, bio); r = map(mq, oblock, can_migrate, discarded_oblock, - bio_data_dir(bio), result); + bio_data_dir(bio), locker, result); mutex_unlock(&mq->lock); diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index f50fe360c5462..5524e21e48365 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -69,6 +69,18 @@ enum policy_operation { POLICY_REPLACE }; +/* + * When issuing a POLICY_REPLACE the policy needs to make a callback to + * lock the block being demoted. This doesn't need to occur during a + * writeback operation since the block remains in the cache. + */ +struct policy_locker; +typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock); + +struct policy_locker { + policy_lock_fn fn; +}; + /* * This is the instruction passed back to the core target. */ @@ -122,7 +134,8 @@ struct dm_cache_policy { */ int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result); + struct bio *bio, struct policy_locker *locker, + struct policy_result *result); /* * Sometimes we want to see if a block is in the cache, without diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7755af3518676..e049becaaf2d2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1445,16 +1445,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) &cache->stats.read_miss : &cache->stats.write_miss); } +/*----------------------------------------------------------------*/ + +struct old_oblock_lock { + struct policy_locker locker; + struct cache *cache; + struct prealloc *structs; + struct dm_bio_prison_cell *cell; +}; + +static int null_locker(struct policy_locker *locker, dm_oblock_t b) +{ + /* This should never be called */ + BUG(); + return 0; +} + +static int cell_locker(struct policy_locker *locker, dm_oblock_t b) +{ + struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker); + struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs); + + return bio_detain(l->cache, b, NULL, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + l->structs, &l->cell); +} + static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { int r; bool release_cell = true; dm_oblock_t block = get_bio_block(cache, bio); - struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; struct policy_result lookup_result; bool passthrough = passthrough_mode(&cache->features); bool discarded_block, can_migrate; + struct old_oblock_lock ool; /* * Check to see if that block is currently migrating. @@ -1469,8 +1496,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs, discarded_block = is_discarded_oblock(cache, block); can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + ool.locker.fn = cell_locker; + ool.cache = cache; + ool.structs = structs; + ool.cell = NULL; r = policy_map(cache->policy, block, true, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) /* migration has been denied */ @@ -1527,27 +1558,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs, break; case POLICY_REPLACE: - cell_prealloc = prealloc_get_cell(structs); - r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, - (cell_free_fn) prealloc_put_cell, - structs, &old_ocell); - if (r > 0) { - /* - * We have to be careful to avoid lock inversion of - * the cells. So we back off, and wait for the - * old_ocell to become free. - */ - policy_force_mapping(cache->policy, block, - lookup_result.old_oblock); - atomic_inc(&cache->stats.cache_cell_clash); - break; - } atomic_inc(&cache->stats.demotion); atomic_inc(&cache->stats.promotion); - demote_then_promote(cache, structs, lookup_result.old_oblock, block, lookup_result.cblock, - old_ocell, new_ocell); + ool.cell, new_ocell); release_cell = false; break; @@ -2595,6 +2610,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso bool discarded_block; struct policy_result lookup_result; struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); + struct old_oblock_lock ool; + + ool.locker.fn = null_locker; if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* @@ -2633,7 +2651,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso discarded_block = is_discarded_oblock(cache, block); r = policy_map(cache->policy, block, false, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) { cell_defer(cache, *cell, true); return DM_MAPIO_SUBMITTED; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5503e43e5f282..049282e6482ff 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -955,7 +955,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); /* * Generate a new unfragmented bio with the given size - * This should never violate the device limitations + * This should never violate the device limitations (but only because + * max_segment_size is being constrained to PAGE_SIZE). * * This function may be called concurrently. If we allocate from the mempool * concurrently, there is a possibility of deadlock. For example, if we have @@ -2040,9 +2041,20 @@ static int crypt_iterate_devices(struct dm_target *ti, return fn(ti, cc->dev, cc->start, ti->len, data); } +static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + /* + * Unfortunate constraint that is required to avoid the potential + * for exceeding underlying device's max_segments limits -- due to + * crypt_alloc_buffer() possibly allocating pages for the encryption + * bio that are not as physically contiguous as the original bio. + */ + limits->max_segment_size = PAGE_SIZE; +} + static struct target_type crypt_target = { .name = "crypt", - .version = {1, 14, 0}, + .version = {1, 14, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -2054,6 +2066,7 @@ static struct target_type crypt_target = { .message = crypt_message, .merge = crypt_merge, .iterate_devices = crypt_iterate_devices, + .io_hints = crypt_io_hints, }; static int __init dm_crypt_init(void) diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 0b2536247cf55..84e27708ad973 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -70,7 +70,7 @@ struct dm_exception_store_type { * Update the metadata with this exception. */ void (*commit_exception) (struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 88e4c7f249864..2c1f2e13719e7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -327,8 +327,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) */ if (min_region_size > (1 << 13)) { /* If not a power of 2, make it the next power of 2 */ - if (min_region_size & (min_region_size - 1)) - region_size = 1 << fls(region_size); + region_size = roundup_pow_of_two(min_region_size); DMINFO("Choosing default region size of %lu sectors", region_size); } else { diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 808b8419bc48d..9feb894e5565a 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -694,7 +694,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store, } static void persistent_commit_exception(struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context) { @@ -703,6 +703,9 @@ static void persistent_commit_exception(struct dm_exception_store *store, struct core_exception ce; struct commit_callback *cb; + if (!valid) + ps->valid = 0; + ce.old_chunk = e->old_chunk; ce.new_chunk = e->new_chunk; write_exception(ps, ps->current_committed++, &ce); diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 1ce9a2586e413..31439d53cf7e8 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -52,12 +52,12 @@ static int transient_prepare_exception(struct dm_exception_store *store, } static void transient_commit_exception(struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context) { /* Just succeed */ - callback(callback_context, 1); + callback(callback_context, valid); } static void transient_usage(struct dm_exception_store *store, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f83a0f3fc3656..11ec9d2a27df2 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1428,8 +1428,9 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) dm_table_event(s->ti->table); } -static void pending_complete(struct dm_snap_pending_exception *pe, int success) +static void pending_complete(void *context, int success) { + struct dm_snap_pending_exception *pe = context; struct dm_exception *e; struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; @@ -1500,24 +1501,13 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) free_pending_exception(pe); } -static void commit_callback(void *context, int success) -{ - struct dm_snap_pending_exception *pe = context; - - pending_complete(pe, success); -} - static void complete_exception(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; - if (unlikely(pe->copy_error)) - pending_complete(pe, 0); - - else - /* Update the metadata if we are persistent */ - s->store->type->commit_exception(s->store, &pe->e, - commit_callback, pe); + /* Update the metadata if we are persistent */ + s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error, + pending_complete, pe); } /* diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index f478a4c96d2f5..419bdd4fc8b8e 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -795,6 +795,8 @@ static int message_stats_create(struct mapped_device *md, return -EINVAL; if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) { + if (!divisor) + return -EINVAL; step = end - start; if (do_div(step, divisor)) step++; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 79f694120ddf0..cde1d6749017c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1295,8 +1295,8 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) return r; disk_super = dm_block_data(copy); - dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root)); - dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root)); + dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root)); + dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root)); dm_sm_dec_block(pmd->metadata_sm, held_root); return dm_tm_unlock(pmd->tm, copy); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 921aafd12aee6..cb58bb3187824 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -260,7 +261,7 @@ struct pool { process_mapping_fn process_prepared_mapping; process_mapping_fn process_prepared_discard; - struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; + struct dm_bio_prison_cell **cell_sort_array; }; static enum pool_mode get_pool_mode(struct pool *pool); @@ -2499,6 +2500,7 @@ static void __pool_destroy(struct pool *pool) { __pool_table_remove(pool); + vfree(pool->cell_sort_array); if (dm_pool_metadata_close(pool->pmd) < 0) DMWARN("%s: dm_pool_metadata_close() failed.", __func__); @@ -2611,6 +2613,13 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } + pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE); + if (!pool->cell_sort_array) { + *error = "Error allocating cell sort array"; + err_p = ERR_PTR(-ENOMEM); + goto bad_sort_array; + } + pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -2619,6 +2628,8 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; +bad_sort_array: + mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -2948,7 +2959,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) metadata_low_callback, pool); if (r) - goto out_free_pt; + goto out_flags_changed; pt->callbacks.congested_fn = pool_is_congested; dm_table_add_target_callbacks(ti->table, &pt->callbacks); @@ -3199,8 +3210,8 @@ static void pool_postsuspend(struct dm_target *ti) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - cancel_delayed_work(&pool->waker); - cancel_delayed_work(&pool->no_space_timeout); + cancel_delayed_work_sync(&pool->waker); + cancel_delayed_work_sync(&pool->no_space_timeout); flush_workqueue(pool->wq); (void) commit(pool); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2caf492890d64..62610aafaac77 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1053,13 +1053,10 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { - int nr_requests_pending; - atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ - nr_requests_pending = md_in_flight(md); - if (!nr_requests_pending) + if (!md_in_flight(md)) wake_up(&md->wait); /* @@ -1071,8 +1068,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) if (run_queue) { if (md->queue->mq_ops) blk_mq_run_hw_queues(md->queue, true); - else if (!nr_requests_pending || - (nr_requests_pending >= md->queue->nr_congestion_on)) + else blk_run_queue_async(md->queue); } @@ -1150,6 +1146,8 @@ static void dm_unprep_request(struct request *rq) if (clone) free_rq_clone(clone); + else if (!tio->md->queue->mq_ops) + free_rq_tio(tio); } /* @@ -1723,7 +1721,8 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors, max_size = 0; + sector_t max_sectors; + int max_size = 0; if (unlikely(!map)) goto out; @@ -1736,18 +1735,10 @@ static int dm_merge_bvec(struct request_queue *q, * Find maximum amount of I/O that won't need splitting */ max_sectors = min(max_io_len(bvm->bi_sector, ti), - (sector_t) queue_max_sectors(q)); + (sector_t) BIO_MAX_SECTORS); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - - /* - * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t - * to the targets' merge function since it holds sectors not bytes). - * Just doing this as an interim fix for stable@ because the more - * comprehensive cleanup of switching to sector_t will impact every - * DM target that implements a ->merge hook. - */ - if (max_size > INT_MAX) - max_size = INT_MAX; + if (max_size < 0) + max_size = 0; /* * merge_bvec_fn() returns number of bytes @@ -1755,13 +1746,13 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); + max_size = ti->type->merge(ti, bvm, biovec, max_size); /* * If the target doesn't support merge method and some of the devices - * provided their merge_bvec method (we know this by looking for the - * max_hw_sectors that dm_set_device_limits may set), then we can't - * allow bios with multiple vector entries. So always set max_size - * to 0, and the code below allows just one page. + * provided their merge_bvec method (we know this by looking at + * queue_max_hw_sectors), then we can't allow bios with multiple vector + * entries. So always set max_size to 0, and the code below allows + * just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) max_size = 0; @@ -2936,8 +2927,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - map = dm_get_live_table(md, &srcu_idx); - spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); @@ -2951,14 +2940,14 @@ static void __dm_destroy(struct mapped_device *md, bool wait) * do not race with internal suspend. */ mutex_lock(&md->suspend_lock); + map = dm_get_live_table(md, &srcu_idx); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } - mutex_unlock(&md->suspend_lock); - /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); + mutex_unlock(&md->suspend_lock); /* * Rare, but there may be I/O requests still going to complete, diff --git a/drivers/md/md.c b/drivers/md/md.c index 4dbed4a67aaf4..78c1f77e79035 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4005,8 +4005,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) else rdev = md_import_device(dev, -1, -1); - if (IS_ERR(rdev)) + if (IS_ERR(rdev)) { + mddev_unlock(mddev); return PTR_ERR(rdev); + } err = bind_rdev_to_array(rdev, mddev); out: if (err) @@ -5159,6 +5161,7 @@ int md_run(struct mddev *mddev) mddev_detach(mddev); if (mddev->private) pers->free(mddev, mddev->private); + mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); return err; @@ -5294,6 +5297,7 @@ static void md_clean(struct mddev *mddev) mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; + mddev->private = NULL; mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; @@ -5361,11 +5365,14 @@ static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; mddev_detach(mddev); + /* Ensure ->event_work is done */ + flush_workqueue(md_misc_wq); spin_lock(&mddev->lock); mddev->ready = 0; mddev->pers = NULL; spin_unlock(&mddev->lock); pers->free(mddev, mddev->private); + mddev->private = NULL; if (pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); @@ -5735,7 +5742,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg) char *ptr; int err; - file = kmalloc(sizeof(*file), GFP_NOIO); + file = kzalloc(sizeof(*file), GFP_NOIO); if (!file) return -ENOMEM; @@ -6375,7 +6382,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->ctime != info->ctime || mddev->level != info->level || /* mddev->layout != info->layout || */ - !mddev->persistent != info->not_persistent|| + mddev->persistent != !info->not_persistent || mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) @@ -8006,8 +8013,7 @@ static int remove_and_add_spares(struct mddev *mddev, !test_bit(Bitmap_sync, &rdev->flags))) continue; - if (rdev->saved_raid_disk < 0) - rdev->recovery_offset = 0; + rdev->recovery_offset = 0; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index bf2b80d5c4707..8731b6ea026bd 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -138,4 +138,10 @@ int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; +/* + * Value type for upper levels of multi-level btrees. + */ +extern void init_le64_type(struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt); + #endif /* DM_BTREE_INTERNAL_H */ diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index b88757cd0d1d9..92cd09f3c69b5 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -301,35 +301,40 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, { int s; uint32_t max_entries = le32_to_cpu(left->header.max_entries); - unsigned target = (nr_left + nr_center + nr_right) / 3; - BUG_ON(target > max_entries); + unsigned total = nr_left + nr_center + nr_right; + unsigned target_right = total / 3; + unsigned remainder = (target_right * 3) != total; + unsigned target_left = target_right + remainder; + + BUG_ON(target_left > max_entries); + BUG_ON(target_right > max_entries); if (nr_left < nr_right) { - s = nr_left - target; + s = nr_left - target_left; if (s < 0 && nr_center < -s) { /* not enough in central node */ - shift(left, center, nr_center); - s = nr_center - target; + shift(left, center, -nr_center); + s += nr_center; shift(left, right, s); nr_right += s; } else shift(left, center, s); - shift(center, right, target - nr_right); + shift(center, right, target_right - nr_right); } else { - s = target - nr_right; + s = target_right - nr_right; if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); - s = target - nr_center; + s -= nr_center; shift(left, right, s); nr_left -= s; } else shift(center, right, s); - shift(left, center, nr_left - target); + shift(left, center, nr_left - target_left); } *key_ptr(parent, c->index) = center->keys[0]; @@ -544,14 +549,6 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, return r; } -static struct dm_btree_value_type le64_type = { - .context = NULL, - .size = sizeof(__le64), - .inc = NULL, - .dec = NULL, - .equal = NULL -}; - int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root) { @@ -559,12 +556,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int index = 0, r = 0; struct shadow_spine spine; struct btree_node *n; + struct dm_btree_value_type le64_vt; + init_le64_type(info->tm, &le64_vt); init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { r = remove_raw(&spine, info, (level == last_level ? - &info->value_type : &le64_type), + &info->value_type : &le64_vt), root, keys[level], (unsigned *)&index); if (r < 0) break; diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 1b5e13ec7f96a..0dee514ba4c5f 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -249,3 +249,40 @@ int shadow_root(struct shadow_spine *s) { return s->root; } + +static void le64_inc(void *context, const void *value_le) +{ + struct dm_transaction_manager *tm = context; + __le64 v_le; + + memcpy(&v_le, value_le, sizeof(v_le)); + dm_tm_inc(tm, le64_to_cpu(v_le)); +} + +static void le64_dec(void *context, const void *value_le) +{ + struct dm_transaction_manager *tm = context; + __le64 v_le; + + memcpy(&v_le, value_le, sizeof(v_le)); + dm_tm_dec(tm, le64_to_cpu(v_le)); +} + +static int le64_equal(void *context, const void *value1_le, const void *value2_le) +{ + __le64 v1_le, v2_le; + + memcpy(&v1_le, value1_le, sizeof(v1_le)); + memcpy(&v2_le, value2_le, sizeof(v2_le)); + return v1_le == v2_le; +} + +void init_le64_type(struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt) +{ + vt->context = tm; + vt->size = sizeof(__le64); + vt->inc = le64_inc; + vt->dec = le64_dec; + vt->equal = le64_equal; +} diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 200ac12a1d407..d6e47033b5e0d 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) int r; struct del_stack *s; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc(sizeof(*s), GFP_NOIO); if (!s) return -ENOMEM; s->info = info; @@ -523,7 +523,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) r = new_block(s->info, &right); if (r < 0) { - /* FIXME: put left */ + unlock_block(s->info, left); return r; } @@ -667,12 +667,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, struct btree_node *n; struct dm_btree_value_type le64_type; - le64_type.context = NULL; - le64_type.size = sizeof(__le64); - le64_type.inc = NULL; - le64_type.dec = NULL; - le64_type.equal = NULL; - + init_le64_type(info->tm, &le64_type); init_shadow_spine(&spine, info); for (level = 0; level < (info->levels - 1); level++) { diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e8a9042988871..53091295fce9b 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -204,6 +204,27 @@ static void in(struct sm_metadata *smm) smm->recursion_count++; } +static int apply_bops(struct sm_metadata *smm) +{ + int r = 0; + + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); + if (r) + break; + } + + return r; +} + static int out(struct sm_metadata *smm) { int r = 0; @@ -216,21 +237,8 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1) { - while (!brb_empty(&smm->uncommitted)) { - struct block_op bop; - - r = brb_pop(&smm->uncommitted, &bop); - if (r) { - DMERR("bug in bop ring buffer"); - break; - } - - r = commit_bop(smm, &bop); - if (r) - break; - } - } + if (smm->recursion_count == 1) + apply_bops(smm); smm->recursion_count--; @@ -704,6 +712,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) } old_len = smm->begin; + r = apply_bops(smm); + if (r) { + DMERR("%s: apply_bops failed", __func__); + goto out; + } + r = sm_ll_commit(&smm->ll); if (r) goto out; @@ -773,6 +787,12 @@ int dm_sm_metadata_create(struct dm_space_map *sm, if (r) return r; + r = apply_bops(smm); + if (r) { + DMERR("%s: apply_bops failed", __func__); + return r; + } + return sm_metadata_commit(sm); } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index efb654eb53992..0875e5e7e09ab 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -83,7 +83,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) char b[BDEVNAME_SIZE]; char b2[BDEVNAME_SIZE]; struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - bool discard_supported = false; + unsigned short blksize = 512; if (!conf) return -ENOMEM; @@ -98,6 +98,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) sector_div(sectors, mddev->chunk_sectors); rdev1->sectors = sectors * mddev->chunk_sectors; + blksize = max(blksize, queue_logical_block_size( + rdev1->bdev->bd_disk->queue)); + rdev_for_each(rdev2, mddev) { pr_debug("md/raid0:%s: comparing %s(%llu)" " with %s(%llu)\n", @@ -134,6 +137,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } pr_debug("md/raid0:%s: FINAL %d zones\n", mdname(mddev), conf->nr_strip_zones); + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if ((mddev->chunk_sectors << 9) % blksize) { + printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n", + mdname(mddev), + mddev->chunk_sectors << 9, blksize); + err = -EINVAL; + goto abort; + } + err = -ENOMEM; conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); @@ -188,19 +203,12 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } dev[j] = rdev1; - if (mddev->queue) - disk_stack_limits(mddev->gendisk, rdev1->bdev, - rdev1->data_offset << 9); - if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) conf->has_merge_bvec = 1; if (!smallest || (rdev1->sectors < smallest->sectors)) smallest = rdev1; cnt++; - - if (blk_queue_discard(bdev_get_queue(rdev1->bdev))) - discard_supported = true; } if (cnt != mddev->raid_disks) { printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - " @@ -261,28 +269,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) (unsigned long long)smallest->sectors); } - /* - * now since we have the hard sector sizes, we can make sure - * chunk size is a multiple of that sector size - */ - if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { - printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n", - mdname(mddev), - mddev->chunk_sectors << 9); - goto abort; - } - - if (mddev->queue) { - blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); - blk_queue_io_opt(mddev->queue, - (mddev->chunk_sectors << 9) * mddev->raid_disks); - - if (!discard_supported) - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - else - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - } - pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -433,12 +419,6 @@ static int raid0_run(struct mddev *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; - if (mddev->queue) { - blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); - } - /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { ret = create_strip_zones(mddev, &conf); @@ -447,6 +427,29 @@ static int raid0_run(struct mddev *mddev) mddev->private = conf; } conf = mddev->private; + if (mddev->queue) { + struct md_rdev *rdev; + bool discard_supported = false; + + blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); + + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + blk_queue_io_opt(mddev->queue, + (mddev->chunk_sectors << 9) * mddev->raid_disks); + + rdev_for_each(rdev, mddev) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; + } + if (!discard_supported) + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + else + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + } /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9157a29c8dbf1..bff6c1c7fecba 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error) spin_lock_irqsave(&conf->device_lock, flags); if (r1_bio->mddev->degraded == conf->raid_disks || (r1_bio->mddev->degraded == conf->raid_disks-1 && - !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))) + test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) uptodate = 1; spin_unlock_irqrestore(&conf->device_lock, flags); } @@ -1475,6 +1475,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; struct r1conf *conf = mddev->private; + unsigned long flags; /* * If it is not operational, then we have already marked it as dead @@ -1494,14 +1495,13 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) return; } set_bit(Blocked, &rdev->flags); + spin_lock_irqsave(&conf->device_lock, flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; set_bit(Faulty, &rdev->flags); - spin_unlock_irqrestore(&conf->device_lock, flags); } else set_bit(Faulty, &rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ @@ -1567,7 +1567,10 @@ static int raid1_spare_active(struct mddev *mddev) * Find all failed disks within the RAID1 configuration * and mark them readable. * Called under mddev lock, so rcu protection not needed. + * device_lock used to avoid races with raid1_end_read_request + * which expects 'In_sync' flags and ->degraded to be consistent. */ + spin_lock_irqsave(&conf->device_lock, flags); for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; @@ -1598,7 +1601,6 @@ static int raid1_spare_active(struct mddev *mddev) sysfs_notify_dirent_safe(rdev->sysfs_state); } } - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded -= count; spin_unlock_irqrestore(&conf->device_lock, flags); @@ -2246,7 +2248,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f55c3f35b7463..adfc83a0f023c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2590,7 +2590,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) choose_data_offset(r10_bio, rdev) + (sector - r10_bio->sector)); wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* Failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) @@ -3566,6 +3566,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* far_copies must be 1 */ conf->prev.stride = conf->dev_sectors; } + conf->reshape_safe = conf->reshape_progress; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); @@ -3770,7 +3771,6 @@ static int run(struct mddev *mddev) } conf->offset_diff = min_offset_diff; - conf->reshape_safe = conf->reshape_progress; clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); @@ -4113,6 +4113,7 @@ static int raid10_start_reshape(struct mddev *mddev) conf->reshape_progress = size; } else conf->reshape_progress = 0; + conf->reshape_safe = conf->reshape_progress; spin_unlock_irq(&conf->device_lock); if (mddev->delta_disks && mddev->bitmap) { @@ -4180,6 +4181,7 @@ static int raid10_start_reshape(struct mddev *mddev) rdev->new_data_offset = rdev->data_offset; smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; mddev->reshape_position = MaxSector; spin_unlock_irq(&conf->device_lock); return ret; @@ -4534,6 +4536,7 @@ static void end_reshape(struct r10conf *conf) md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); /* read-ahead size must cover two whole stripes, which is diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b6793d2e051f3..0d767e31f455d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2151,6 +2151,9 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!sc) return -ENOMEM; + /* Need to ensure auto-resizing doesn't interfere */ + mutex_lock(&conf->cache_size_mutex); + for (i = conf->max_nr_stripes; i; i--) { nsh = alloc_stripe(sc, GFP_KERNEL); if (!nsh) @@ -2167,6 +2170,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) kmem_cache_free(sc, nsh); } kmem_cache_destroy(sc); + mutex_unlock(&conf->cache_size_mutex); return -ENOMEM; } /* Step 2 - Must use GFP_NOIO now. @@ -2213,6 +2217,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; + mutex_unlock(&conf->cache_size_mutex); /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); @@ -2240,7 +2245,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) static int drop_one_stripe(struct r5conf *conf) { struct stripe_head *sh; - int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS; + int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; spin_lock_irq(conf->hash_locks + hash); sh = get_free_stripe(conf, hash); @@ -3489,6 +3494,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, } if (!discard_pending && test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { + int hash; clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); if (sh->qd_idx >= 0) { @@ -3502,16 +3508,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, * no updated data, so remove it from hash list and the stripe * will be reinitialized */ - spin_lock_irq(&conf->device_lock); unhash: + hash = sh->hash_lock_index; + spin_lock_irq(conf->hash_locks + hash); remove_hash(sh); + spin_unlock_irq(conf->hash_locks + hash); if (head_sh->batch_head) { sh = list_first_entry(&sh->batch_list, struct stripe_head, batch_list); if (sh != head_sh) goto unhash; } - spin_unlock_irq(&conf->device_lock); sh = head_sh; if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) @@ -5846,12 +5853,14 @@ static void raid5d(struct md_thread *thread) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); - if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { + if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && + mutex_trylock(&conf->cache_size_mutex)) { grow_one_stripe(conf, __GFP_NOWARN); /* Set flag even if allocation failed. This helps * slow down allocation requests when mem is short */ set_bit(R5_DID_ALLOC, &conf->cache_state); + mutex_unlock(&conf->cache_size_mutex); } async_tx_issue_pending_all(); @@ -5883,18 +5892,22 @@ raid5_set_cache_size(struct mddev *mddev, int size) return -EINVAL; conf->min_nr_stripes = size; + mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) ; + mutex_unlock(&conf->cache_size_mutex); err = md_allow_write(mddev); if (err) return err; + mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) break; + mutex_unlock(&conf->cache_size_mutex); return 0; } @@ -6360,11 +6373,19 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); - int ret = 0; - while (ret < sc->nr_to_scan) { - if (drop_one_stripe(conf) == 0) - return SHRINK_STOP; - ret++; + unsigned long ret = SHRINK_STOP; + + if (mutex_trylock(&conf->cache_size_mutex)) { + ret= 0; + while (ret < sc->nr_to_scan && + conf->max_nr_stripes > conf->min_nr_stripes) { + if (drop_one_stripe(conf) == 0) { + ret = SHRINK_STOP; + break; + } + ret++; + } + mutex_unlock(&conf->cache_size_mutex); } return ret; } @@ -6433,6 +6454,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; spin_lock_init(&conf->device_lock); seqcount_init(&conf->gen_lock); + mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896d603ad0da9..03472fbbd8823 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -482,7 +482,8 @@ struct r5conf { */ int active_name; char cache_name[2][32]; - struct kmem_cache *slab_cache; /* for allocating stripes */ + struct kmem_cache *slab_cache; /* for allocating stripes */ + struct mutex cache_size_mutex; /* Protect changes to cache size */ int seq_flush, seq_write; int quiesce; diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 882ca417f328a..3ab874703d119 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -2333,9 +2333,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file, dev_dbg(fe->dvb->device, "%s: current delivery system on cache: %d, V3 type: %d\n", __func__, c->delivery_system, fe->ops.info.type); - /* Force the CAN_INVERSION_AUTO bit on. If the frontend doesn't - * do it, it is done for it. */ - info->caps |= FE_CAN_INVERSION_AUTO; + /* Set CAN_INVERSION_AUTO bit on in other than oneshot mode */ + if (!(fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT)) + info->caps |= FE_CAN_INVERSION_AUTO; err = 0; break; } diff --git a/drivers/media/dvb-frontends/af9013.c b/drivers/media/dvb-frontends/af9013.c index 8001690d7576c..ba6c8f6c42a1c 100644 --- a/drivers/media/dvb-frontends/af9013.c +++ b/drivers/media/dvb-frontends/af9013.c @@ -605,6 +605,10 @@ static int af9013_set_frontend(struct dvb_frontend *fe) } } + /* Return an error if can't find bandwidth or the right clock */ + if (i == ARRAY_SIZE(coeff_lut)) + return -EINVAL; + ret = af9013_wr_regs(state, 0xae00, coeff_lut[i].val, sizeof(coeff_lut[i].val)); } diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c index 2916d7c74a1da..7bc68b355c0b9 100644 --- a/drivers/media/dvb-frontends/cx24116.c +++ b/drivers/media/dvb-frontends/cx24116.c @@ -963,6 +963,10 @@ static int cx24116_send_diseqc_msg(struct dvb_frontend *fe, struct cx24116_state *state = fe->demodulator_priv; int i, ret; + /* Validate length */ + if (d->msg_len > sizeof(d->msg)) + return -EINVAL; + /* Dump DiSEqC message */ if (debug) { printk(KERN_INFO "cx24116: %s(", __func__); @@ -974,10 +978,6 @@ static int cx24116_send_diseqc_msg(struct dvb_frontend *fe, printk(") toneburst=%d\n", toneburst); } - /* Validate length */ - if (d->msg_len > (CX24116_ARGLEN - CX24116_DISEQC_MSGOFS)) - return -EINVAL; - /* DiSEqC message */ for (i = 0; i < d->msg_len; i++) state->dsec_cmd.args[CX24116_DISEQC_MSGOFS + i] = d->msg[i]; diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c index acb965ce0358b..af6363573efd6 100644 --- a/drivers/media/dvb-frontends/cx24117.c +++ b/drivers/media/dvb-frontends/cx24117.c @@ -1043,7 +1043,7 @@ static int cx24117_send_diseqc_msg(struct dvb_frontend *fe, dev_dbg(&state->priv->i2c->dev, ")\n"); /* Validate length */ - if (d->msg_len > 15) + if (d->msg_len > sizeof(d->msg)) return -EINVAL; /* DiSEqC message */ diff --git a/drivers/media/dvb-frontends/s5h1420.c b/drivers/media/dvb-frontends/s5h1420.c index 93eeaf7118fd0..0b4f8fe6bf990 100644 --- a/drivers/media/dvb-frontends/s5h1420.c +++ b/drivers/media/dvb-frontends/s5h1420.c @@ -180,7 +180,7 @@ static int s5h1420_send_master_cmd (struct dvb_frontend* fe, int result = 0; dprintk("enter %s\n", __func__); - if (cmd->msg_len > 8) + if (cmd->msg_len > sizeof(cmd->msg)) return -EINVAL; /* setup for DISEQC */ diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c index 5db588ebfc24e..391e98395b411 100644 --- a/drivers/media/dvb-frontends/si2168.c +++ b/drivers/media/dvb-frontends/si2168.c @@ -457,6 +457,10 @@ static int si2168_init(struct dvb_frontend *fe) /* firmware is in the new format */ for (remaining = fw->size; remaining > 0; remaining -= 17) { len = fw->data[fw->size - remaining]; + if (len > SI2168_ARGLEN) { + ret = -EINVAL; + break; + } memcpy(cmd.args, &fw->data[(fw->size - remaining) + 1], len); cmd.wlen = len; cmd.rlen = 1; diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c index a2631be7ffac9..08e0f0dd8728b 100644 --- a/drivers/media/dvb-frontends/tda1004x.c +++ b/drivers/media/dvb-frontends/tda1004x.c @@ -903,9 +903,18 @@ static int tda1004x_get_fe(struct dvb_frontend *fe) { struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache; struct tda1004x_state* state = fe->demodulator_priv; + int status; dprintk("%s\n", __func__); + status = tda1004x_read_byte(state, TDA1004X_STATUS_CD); + if (status == -1) + return -EIO; + + /* Only update the properties cache if device is locked */ + if (!(status & 8)) + return 0; + // inversion status fe_params->inversion = INVERSION_OFF; if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20) diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index 60ffcf098befa..5f92ec23bb075 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -1911,10 +1911,9 @@ static int adv76xx_isr(struct v4l2_subdev *sd, u32 status, bool *handled) } /* tx 5v detect */ - tx_5v = io_read(sd, 0x70) & info->cable_det_mask; + tx_5v = irq_reg_0x70 & info->cable_det_mask; if (tx_5v) { v4l2_dbg(1, debug, sd, "%s: tx_5v: 0x%x\n", __func__, tx_5v); - io_write(sd, 0x71, tx_5v); adv76xx_s_detect_tx_5v_ctrl(sd); if (handled) *handled = true; diff --git a/drivers/media/pci/cx18/cx18-streams.c b/drivers/media/pci/cx18/cx18-streams.c index c82d25d533416..c9860845264fa 100644 --- a/drivers/media/pci/cx18/cx18-streams.c +++ b/drivers/media/pci/cx18/cx18-streams.c @@ -90,6 +90,7 @@ static struct { "encoder PCM audio", VFL_TYPE_GRABBER, CX18_V4L2_ENC_PCM_OFFSET, PCI_DMA_FROMDEVICE, + V4L2_CAP_TUNER | V4L2_CAP_AUDIO | V4L2_CAP_READWRITE, }, { /* CX18_ENC_STREAM_TYPE_IDX */ "encoder IDX", diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c index ac3cd74e824e0..067db727e685f 100644 --- a/drivers/media/pci/saa7134/saa7134-alsa.c +++ b/drivers/media/pci/saa7134/saa7134-alsa.c @@ -1218,6 +1218,8 @@ static int alsa_device_init(struct saa7134_dev *dev) static int alsa_device_exit(struct saa7134_dev *dev) { + if (!snd_saa7134_cards[dev->nr]) + return 1; snd_card_free(snd_saa7134_cards[dev->nr]); snd_saa7134_cards[dev->nr] = NULL; @@ -1267,7 +1269,8 @@ static void saa7134_alsa_exit(void) int idx; for (idx = 0; idx < SNDRV_CARDS; idx++) { - snd_card_free(snd_saa7134_cards[idx]); + if (snd_saa7134_cards[idx]) + snd_card_free(snd_saa7134_cards[idx]); } saa7134_dmasound_init = NULL; diff --git a/drivers/media/pci/saa7164/saa7164-encoder.c b/drivers/media/pci/saa7164/saa7164-encoder.c index 9266965412c34..7a0a65146723c 100644 --- a/drivers/media/pci/saa7164/saa7164-encoder.c +++ b/drivers/media/pci/saa7164/saa7164-encoder.c @@ -721,13 +721,14 @@ static int vidioc_querycap(struct file *file, void *priv, sizeof(cap->card)); sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci)); - cap->capabilities = + cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | - V4L2_CAP_READWRITE | - 0; + V4L2_CAP_READWRITE | + V4L2_CAP_TUNER; - cap->capabilities |= V4L2_CAP_TUNER; - cap->version = 0; + cap->capabilities = cap->device_caps | + V4L2_CAP_VBI_CAPTURE | + V4L2_CAP_DEVICE_CAPS; return 0; } diff --git a/drivers/media/pci/saa7164/saa7164-vbi.c b/drivers/media/pci/saa7164/saa7164-vbi.c index 6e025fea25422..06117e6c0596b 100644 --- a/drivers/media/pci/saa7164/saa7164-vbi.c +++ b/drivers/media/pci/saa7164/saa7164-vbi.c @@ -660,13 +660,14 @@ static int vidioc_querycap(struct file *file, void *priv, sizeof(cap->card)); sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci)); - cap->capabilities = + cap->device_caps = V4L2_CAP_VBI_CAPTURE | - V4L2_CAP_READWRITE | - 0; + V4L2_CAP_READWRITE | + V4L2_CAP_TUNER; - cap->capabilities |= V4L2_CAP_TUNER; - cap->version = 0; + cap->capabilities = cap->device_caps | + V4L2_CAP_VIDEO_CAPTURE | + V4L2_CAP_DEVICE_CAPS; return 0; } diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index a30cc2f7e4f12..ddf59ee5ca400 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1185,14 +1185,24 @@ static int vpfe_initialize_device(struct vpfe_device *vpfe) static int vpfe_release(struct file *file) { struct vpfe_device *vpfe = video_drvdata(file); + bool fh_singular; int ret; mutex_lock(&vpfe->lock); - if (v4l2_fh_is_singular_file(file)) - vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev); + /* Save the singular status before we call the clean-up helper */ + fh_singular = v4l2_fh_is_singular_file(file); + + /* the release helper will cleanup any on-going streaming */ ret = _vb2_fop_release(file, NULL); + /* + * If this was the last open file. + * Then de-initialize hw module. + */ + if (fh_singular) + vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev); + mutex_unlock(&vpfe->lock); return ret; @@ -1577,7 +1587,7 @@ static int vpfe_s_fmt(struct file *file, void *priv, return -EBUSY; } - ret = vpfe_try_fmt(file, priv, fmt); + ret = vpfe_try_fmt(file, priv, &format); if (ret) return ret; diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 18d0a871747fe..947d8be7b2451 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -829,14 +829,14 @@ static int isp_pipeline_link_notify(struct media_link *link, u32 flags, int ret; if (notification == MEDIA_DEV_NOTIFY_POST_LINK_CH && - !(link->flags & MEDIA_LNK_FL_ENABLED)) { + !(flags & MEDIA_LNK_FL_ENABLED)) { /* Powering off entities is assumed to never fail. */ isp_pipeline_pm_power(source, -sink_use); isp_pipeline_pm_power(sink, -source_use); return 0; } - if (notification == MEDIA_DEV_NOTIFY_POST_LINK_CH && + if (notification == MEDIA_DEV_NOTIFY_PRE_LINK_CH && (flags & MEDIA_LNK_FL_ENABLED)) { ret = isp_pipeline_pm_power(source, sink_use); diff --git a/drivers/media/platform/vivid/vivid-osd.c b/drivers/media/platform/vivid/vivid-osd.c index 084d346fb4c4f..e15eef6a94e55 100644 --- a/drivers/media/platform/vivid/vivid-osd.c +++ b/drivers/media/platform/vivid/vivid-osd.c @@ -85,6 +85,7 @@ static int vivid_fb_ioctl(struct fb_info *info, unsigned cmd, unsigned long arg) case FBIOGET_VBLANK: { struct fb_vblank vblank; + memset(&vblank, 0, sizeof(vblank)); vblank.flags = FB_VBLANK_HAVE_COUNT | FB_VBLANK_HAVE_VCOUNT | FB_VBLANK_HAVE_VSYNC; vblank.count = 0; diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index f8c5e47a30aa7..0aba9ff921026 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1191,9 +1191,6 @@ static int rc_dev_uevent(struct device *device, struct kobj_uevent_env *env) { struct rc_dev *dev = to_rc_dev(device); - if (!dev || !dev->input_dev) - return -ENODEV; - if (dev->rc_map.name) ADD_HOTPLUG_VAR("NAME=%s", dev->rc_map.name); if (dev->driver_name) diff --git a/drivers/media/rc/sunxi-cir.c b/drivers/media/rc/sunxi-cir.c index 7830aef3db459..40f77685cc4a2 100644 --- a/drivers/media/rc/sunxi-cir.c +++ b/drivers/media/rc/sunxi-cir.c @@ -153,6 +153,8 @@ static int sunxi_ir_probe(struct platform_device *pdev) if (!ir) return -ENOMEM; + spin_lock_init(&ir->ir_lock); + if (of_device_is_compatible(dn, "allwinner,sun5i-a13-ir")) ir->fifo_size = 64; else diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index d74ae26621ca5..c5dbba5b5bc94 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -165,6 +165,10 @@ static int si2157_init(struct dvb_frontend *fe) for (remaining = fw->size; remaining > 0; remaining -= 17) { len = fw->data[fw->size - remaining]; + if (len > SI2157_ARGLEN) { + dev_err(&client->dev, "Bad firmware length\n"); + goto err_release_firmware; + } memcpy(cmd.args, &fw->data[(fw->size - remaining) + 1], len); cmd.wlen = len; cmd.rlen = 1; diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c index 4069234abed54..a50750ce511dd 100644 --- a/drivers/media/usb/airspy/airspy.c +++ b/drivers/media/usb/airspy/airspy.c @@ -132,7 +132,7 @@ struct airspy { int urbs_submitted; /* USB control message buffer */ - #define BUF_SIZE 24 + #define BUF_SIZE 128 u8 buf[BUF_SIZE]; /* Current configuration */ diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 895441fe90f7c..e862554952c12 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -34,6 +34,14 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) unsigned int pipe; u8 requesttype; + mutex_lock(&d->usb_mutex); + + if (req->size > sizeof(dev->buf)) { + dev_err(&d->intf->dev, "too large message %u\n", req->size); + ret = -EINVAL; + goto err_mutex_unlock; + } + if (req->index & CMD_WR_FLAG) { /* write */ memcpy(dev->buf, req->data, req->size); @@ -50,14 +58,17 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) dvb_usb_dbg_usb_control_msg(d->udev, 0, requesttype, req->value, req->index, dev->buf, req->size); if (ret < 0) - goto err; + goto err_mutex_unlock; /* read request, copy returned data to return buf */ if (requesttype == (USB_TYPE_VENDOR | USB_DIR_IN)) memcpy(req->data, dev->buf, req->size); + mutex_unlock(&d->usb_mutex); + return 0; -err: +err_mutex_unlock: + mutex_unlock(&d->usb_mutex); dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.h b/drivers/media/usb/dvb-usb-v2/rtl28xxu.h index 1b5d7ffb685e0..1bdeda05d3325 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.h +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.h @@ -69,7 +69,7 @@ struct rtl28xxu_dev { - u8 buf[28]; + u8 buf[128]; u8 chip_id; u8 tuner; char *tuner_name; diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 2b40393836ffa..0d248ce02a9b2 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -655,10 +655,20 @@ int dib0700_change_protocol(struct rc_dev *rc, u64 *rc_type) struct dib0700_rc_response { u8 report_id; u8 data_state; - u8 system; - u8 not_system; - u8 data; - u8 not_data; + union { + struct { + u8 system; + u8 not_system; + u8 data; + u8 not_data; + } nec; + struct { + u8 not_used; + u8 system; + u8 data; + u8 not_data; + } rc5; + }; }; #define RC_MSG_SIZE_V1_20 6 @@ -694,8 +704,8 @@ static void dib0700_rc_urb_completion(struct urb *purb) deb_data("IR ID = %02X state = %02X System = %02X %02X Cmd = %02X %02X (len %d)\n", poll_reply->report_id, poll_reply->data_state, - poll_reply->system, poll_reply->not_system, - poll_reply->data, poll_reply->not_data, + poll_reply->nec.system, poll_reply->nec.not_system, + poll_reply->nec.data, poll_reply->nec.not_data, purb->actual_length); switch (d->props.rc.core.protocol) { @@ -704,30 +714,30 @@ static void dib0700_rc_urb_completion(struct urb *purb) toggle = 0; /* NEC protocol sends repeat code as 0 0 0 FF */ - if (poll_reply->system == 0x00 && - poll_reply->not_system == 0x00 && - poll_reply->data == 0x00 && - poll_reply->not_data == 0xff) { + if (poll_reply->nec.system == 0x00 && + poll_reply->nec.not_system == 0x00 && + poll_reply->nec.data == 0x00 && + poll_reply->nec.not_data == 0xff) { poll_reply->data_state = 2; break; } - if ((poll_reply->data ^ poll_reply->not_data) != 0xff) { + if ((poll_reply->nec.data ^ poll_reply->nec.not_data) != 0xff) { deb_data("NEC32 protocol\n"); - keycode = RC_SCANCODE_NEC32(poll_reply->system << 24 | - poll_reply->not_system << 16 | - poll_reply->data << 8 | - poll_reply->not_data); - } else if ((poll_reply->system ^ poll_reply->not_system) != 0xff) { + keycode = RC_SCANCODE_NEC32(poll_reply->nec.system << 24 | + poll_reply->nec.not_system << 16 | + poll_reply->nec.data << 8 | + poll_reply->nec.not_data); + } else if ((poll_reply->nec.system ^ poll_reply->nec.not_system) != 0xff) { deb_data("NEC extended protocol\n"); - keycode = RC_SCANCODE_NECX(poll_reply->system << 8 | - poll_reply->not_system, - poll_reply->data); + keycode = RC_SCANCODE_NECX(poll_reply->nec.system << 8 | + poll_reply->nec.not_system, + poll_reply->nec.data); } else { deb_data("NEC normal protocol\n"); - keycode = RC_SCANCODE_NEC(poll_reply->system, - poll_reply->data); + keycode = RC_SCANCODE_NEC(poll_reply->nec.system, + poll_reply->nec.data); } break; @@ -735,19 +745,19 @@ static void dib0700_rc_urb_completion(struct urb *purb) deb_data("RC5 protocol\n"); protocol = RC_TYPE_RC5; toggle = poll_reply->report_id; - keycode = RC_SCANCODE_RC5(poll_reply->system, poll_reply->data); + keycode = RC_SCANCODE_RC5(poll_reply->rc5.system, poll_reply->rc5.data); + + if ((poll_reply->rc5.data ^ poll_reply->rc5.not_data) != 0xff) { + /* Key failed integrity check */ + err("key failed integrity check: %02x %02x %02x %02x", + poll_reply->rc5.not_used, poll_reply->rc5.system, + poll_reply->rc5.data, poll_reply->rc5.not_data); + goto resubmit; + } break; } - if ((poll_reply->data + poll_reply->not_data) != 0xff) { - /* Key failed integrity check */ - err("key failed integrity check: %02x %02x %02x %02x", - poll_reply->system, poll_reply->not_system, - poll_reply->data, poll_reply->not_data); - goto resubmit; - } - rc_keydown(d->rc_dev, protocol, keycode, toggle); resubmit: diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index d7d55a20e9590..c170523226aad 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -3944,6 +3944,8 @@ struct dvb_usb_device_properties dib0700_devices[] = { DIB0700_DEFAULT_STREAMING_CONFIG(0x02), }}, + .size_of_priv = sizeof(struct + dib0700_adapter_state), }, { .num_frontends = 1, .fe = {{ @@ -3956,6 +3958,8 @@ struct dvb_usb_device_properties dib0700_devices[] = { DIB0700_DEFAULT_STREAMING_CONFIG(0x03), }}, + .size_of_priv = sizeof(struct + dib0700_adapter_state), } }, @@ -4009,6 +4013,8 @@ struct dvb_usb_device_properties dib0700_devices[] = { DIB0700_DEFAULT_STREAMING_CONFIG(0x02), }}, + .size_of_priv = sizeof(struct + dib0700_adapter_state), }, }, diff --git a/drivers/media/usb/gspca/ov534.c b/drivers/media/usb/gspca/ov534.c index 146071b8e1161..bfff1d1c70ab0 100644 --- a/drivers/media/usb/gspca/ov534.c +++ b/drivers/media/usb/gspca/ov534.c @@ -1491,8 +1491,13 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev, struct v4l2_fract *tpf = &cp->timeperframe; struct sd *sd = (struct sd *) gspca_dev; - /* Set requested framerate */ - sd->frame_rate = tpf->denominator / tpf->numerator; + if (tpf->numerator == 0 || tpf->denominator == 0) + /* Set default framerate */ + sd->frame_rate = 30; + else + /* Set requested framerate */ + sd->frame_rate = tpf->denominator / tpf->numerator; + if (gspca_dev->streaming) set_frame_rate(gspca_dev); diff --git a/drivers/media/usb/gspca/topro.c b/drivers/media/usb/gspca/topro.c index c70ff406b07ac..c028a5c2438ed 100644 --- a/drivers/media/usb/gspca/topro.c +++ b/drivers/media/usb/gspca/topro.c @@ -4802,7 +4802,11 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev, struct v4l2_fract *tpf = &cp->timeperframe; int fr, i; - sd->framerate = tpf->denominator / tpf->numerator; + if (tpf->numerator == 0 || tpf->denominator == 0) + sd->framerate = 30; + else + sd->framerate = tpf->denominator / tpf->numerator; + if (gspca_dev->streaming) setframerate(gspca_dev, v4l2_ctrl_g_ctrl(gspca_dev->exposure)); diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 66ada01c796ca..472eaad6fb78d 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1237,6 +1237,23 @@ void vb2_discard_done(struct vb2_queue *q) } EXPORT_SYMBOL_GPL(vb2_discard_done); +static void vb2_warn_zero_bytesused(struct vb2_buffer *vb) +{ + static bool __check_once __read_mostly; + + if (__check_once) + return; + + __check_once = true; + __WARN(); + + pr_warn_once("use of bytesused == 0 is deprecated and will be removed in the future,\n"); + if (vb->vb2_queue->allow_zero_bytesused) + pr_warn_once("use VIDIOC_DECODER_CMD(V4L2_DEC_CMD_STOP) instead.\n"); + else + pr_warn_once("use the actual size instead.\n"); +} + /** * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a * v4l2_buffer by the userspace. The caller has already verified that struct @@ -1247,16 +1264,6 @@ static void __fill_vb2_buffer(struct vb2_buffer *vb, const struct v4l2_buffer *b { unsigned int plane; - if (V4L2_TYPE_IS_OUTPUT(b->type)) { - if (WARN_ON_ONCE(b->bytesused == 0)) { - pr_warn_once("use of bytesused == 0 is deprecated and will be removed in the future,\n"); - if (vb->vb2_queue->allow_zero_bytesused) - pr_warn_once("use VIDIOC_DECODER_CMD(V4L2_DEC_CMD_STOP) instead.\n"); - else - pr_warn_once("use the actual size instead.\n"); - } - } - if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) { if (b->memory == V4L2_MEMORY_USERPTR) { for (plane = 0; plane < vb->num_planes; ++plane) { @@ -1297,6 +1304,9 @@ static void __fill_vb2_buffer(struct vb2_buffer *vb, const struct v4l2_buffer *b struct v4l2_plane *pdst = &v4l2_planes[plane]; struct v4l2_plane *psrc = &b->m.planes[plane]; + if (psrc->bytesused == 0) + vb2_warn_zero_bytesused(vb); + if (vb->vb2_queue->allow_zero_bytesused) pdst->bytesused = psrc->bytesused; else @@ -1331,6 +1341,9 @@ static void __fill_vb2_buffer(struct vb2_buffer *vb, const struct v4l2_buffer *b } if (V4L2_TYPE_IS_OUTPUT(b->type)) { + if (b->bytesused == 0) + vb2_warn_zero_bytesused(vb); + if (vb->vb2_queue->allow_zero_bytesused) v4l2_planes[0].bytesused = b->bytesused; else @@ -2649,10 +2662,10 @@ unsigned int vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait) return res | POLLERR; /* - * For output streams you can write as long as there are fewer buffers - * queued than there are buffers available. + * For output streams you can call write() as long as there are fewer + * buffers queued than there are buffers available. */ - if (V4L2_TYPE_IS_OUTPUT(q->type) && q->queued_count < q->num_buffers) + if (V4L2_TYPE_IS_OUTPUT(q->type) && q->fileio && q->queued_count < q->num_buffers) return res | POLLOUT | POLLWRNORM; if (list_empty(&q->done_list)) diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index 511e9a25c151c..16c4d26f51e72 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -935,6 +935,7 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra114_swgroups), .supports_round_robin_arbitration = false, .supports_request_limit = false, + .num_tlb_lines = 32, .num_asids = 4, .ops = &tegra114_smmu_ops, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index 278d40b854c15..b153d0b732cf6 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -981,6 +981,7 @@ static const struct tegra_smmu_soc tegra124_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra124_swgroups), .supports_round_robin_arbitration = true, .supports_request_limit = true, + .num_tlb_lines = 32, .num_asids = 128, .ops = &tegra124_smmu_ops, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index 71fe9376fe533..f422b18f45f30 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -957,6 +957,7 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra30_swgroups), .supports_round_robin_arbitration = false, .supports_request_limit = false, + .num_tlb_lines = 16, .num_asids = 4, .ops = &tegra30_smmu_ops, }; diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index 6ca6dfab50ebf..6523903e15fe5 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -912,10 +912,6 @@ int arizona_dev_init(struct arizona *arizona) arizona->pdata.gpio_defaults[i]); } - pm_runtime_set_autosuspend_delay(arizona->dev, 100); - pm_runtime_use_autosuspend(arizona->dev); - pm_runtime_enable(arizona->dev); - /* Chip default */ if (!arizona->pdata.clk32k_src) arizona->pdata.clk32k_src = ARIZONA_32KZ_MCLK2; @@ -1012,11 +1008,17 @@ int arizona_dev_init(struct arizona *arizona) arizona->pdata.spk_fmt[i]); } + pm_runtime_set_active(arizona->dev); + pm_runtime_enable(arizona->dev); + /* Set up for interrupts */ ret = arizona_irq_init(arizona); if (ret != 0) goto err_reset; + pm_runtime_set_autosuspend_delay(arizona->dev, 100); + pm_runtime_use_autosuspend(arizona->dev); + arizona_request_irq(arizona, ARIZONA_IRQ_CLKGEN_ERR, "CLKGEN error", arizona_clkgen_err, arizona); arizona_request_irq(arizona, ARIZONA_IRQ_OVERCLOCKED, "Overclocked", @@ -1045,10 +1047,6 @@ int arizona_dev_init(struct arizona *arizona) goto err_irq; } -#ifdef CONFIG_PM - regulator_disable(arizona->dcvdd); -#endif - return 0; err_irq: diff --git a/drivers/mfd/max77843.c b/drivers/mfd/max77843.c index a354ac677ec70..1074a0d68680a 100644 --- a/drivers/mfd/max77843.c +++ b/drivers/mfd/max77843.c @@ -79,7 +79,7 @@ static int max77843_chg_init(struct max77843 *max77843) if (!max77843->i2c_chg) { dev_err(&max77843->i2c->dev, "Cannot allocate I2C device for Charger\n"); - return PTR_ERR(max77843->i2c_chg); + return -ENODEV; } i2c_set_clientdata(max77843->i2c_chg, max77843); diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index c5265c1262c50..6aacd205a774b 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -647,6 +647,8 @@ static int twl6040_probe(struct i2c_client *client, twl6040->clk32k = devm_clk_get(&client->dev, "clk32k"); if (IS_ERR(twl6040->clk32k)) { + if (PTR_ERR(twl6040->clk32k) == -EPROBE_DEFER) + return -EPROBE_DEFER; dev_info(&client->dev, "clk32k is not handled\n"); twl6040->clk32k = NULL; } diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index d1b55fe62817d..e4dc8cdf67a34 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -113,11 +113,11 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { area = ctx->afu->psn_phys; - if (offset > ctx->afu->adapter->ps_size) + if (offset >= ctx->afu->adapter->ps_size) return VM_FAULT_SIGBUS; } else { area = ctx->psn_phys; - if (offset > ctx->psn_size) + if (offset >= ctx->psn_size) return VM_FAULT_SIGBUS; } diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 8ccddceead667..de350dd46218d 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -73,7 +73,7 @@ static inline void cxl_slbia_core(struct mm_struct *mm) spin_lock(&adapter->afu_list_lock); for (slice = 0; slice < adapter->slices; slice++) { afu = adapter->afu[slice]; - if (!afu->enabled) + if (!afu || !afu->enabled) continue; rcu_read_lock(); idr_for_each_entry(&afu->contexts_idr, ctx, id) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1ef01647265f9..4f1b0bdb9cf84 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -778,14 +778,9 @@ int cxl_reset(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; - int i; - u32 val; dev_info(&dev->dev, "CXL reset\n"); - for (i = 0; i < adapter->slices; i++) - cxl_remove_afu(adapter->afu[i]); - /* pcie_warm_reset requests a fundamental pci reset which includes a * PERST assert/deassert. PERST triggers a loading of the image * if "user" or "factory" is selected in sysfs */ @@ -794,20 +789,6 @@ int cxl_reset(struct cxl *adapter) return rc; } - /* the PERST done above fences the PHB. So, reset depends on EEH - * to unbind the driver, tell Sapphire to reinit the PHB, and rebind - * the driver. Do an mmio read explictly to ensure EEH notices the - * fenced PHB. Retry for a few seconds before giving up. */ - i = 0; - while (((val = mmio_read32be(adapter->p1_mmio)) != 0xffffffff) && - (i < 5)) { - msleep(500); - i++; - } - - if (val != 0xffffffff) - dev_err(&dev->dev, "cxl: PERST failed to trigger EEH\n"); - return rc; } @@ -1062,8 +1043,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; - pci_dev_get(dev); - if (cxl_verbose) dump_cxl_config_space(dev); diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 1e99ef6a54a2b..b2b9f4382d771 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -699,7 +699,7 @@ void mei_host_client_init(struct work_struct *work) bool mei_hbuf_acquire(struct mei_device *dev) { if (mei_pg_state(dev) == MEI_PG_ON || - dev->pg_event == MEI_PG_EVENT_WAIT) { + mei_pg_in_transition(dev)) { dev_dbg(dev->dev, "device is in pg\n"); return false; } diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 6fb75e62a764c..43d7101ff9933 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -663,17 +663,46 @@ int mei_me_pg_exit_sync(struct mei_device *dev) mutex_lock(&dev->device_lock); reply: - if (dev->pg_event == MEI_PG_EVENT_RECEIVED) - ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED) + ret = 0; else ret = -ETIME; +out: dev->pg_event = MEI_PG_EVENT_IDLE; hw->pg_state = MEI_PG_OFF; return ret; } +/** + * mei_me_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_me_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event >= MEI_PG_EVENT_WAIT && + dev->pg_event <= MEI_PG_EVENT_INTR_WAIT; +} + /** * mei_me_pg_is_enabled - detect if PG is supported by HW * @@ -704,6 +733,24 @@ static bool mei_me_pg_is_enabled(struct mei_device *dev) return false; } +/** + * mei_me_pg_intr - perform pg processing in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT) + return; + + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + hw->pg_state = MEI_PG_OFF; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); +} + /** * mei_me_irq_quick_handler - The ISR of the MEI device * @@ -761,6 +808,8 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) goto end; } + mei_me_pg_intr(dev); + /* check if we need to start the dev */ if (!mei_host_is_ready(dev)) { if (mei_hw_is_ready(dev)) { @@ -797,9 +846,10 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* * During PG handshake only allowed write is the replay to the * PG exit message, so block calling write function - * if the pg state is not idle + * if the pg event is in PG handshake */ - if (dev->pg_event == MEI_PG_EVENT_IDLE) { + if (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_RECEIVED) { rets = mei_irq_write_handler(dev, &complete_list); dev->hbuf_is_ready = mei_hbuf_is_ready(dev); } @@ -824,6 +874,7 @@ static const struct mei_hw_ops mei_me_hw_ops = { .hw_config = mei_me_hw_config, .hw_start = mei_me_hw_start, + .pg_in_transition = mei_me_pg_in_transition, .pg_is_enabled = mei_me_pg_is_enabled, .intr_clear = mei_me_intr_clear, diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 7abafe7d120d8..bae680c648ffc 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -218,26 +219,25 @@ static u32 mei_txe_aliveness_get(struct mei_device *dev) * * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set * - * Return: > 0 if the expected value was received, -ETIME otherwise + * Return: 0 if the expected value was received, -ETIME otherwise */ static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected) { struct mei_txe_hw *hw = to_txe_hw(dev); - int t = 0; + ktime_t stop, start; + start = ktime_get(); + stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT)); do { hw->aliveness = mei_txe_aliveness_get(dev); if (hw->aliveness == expected) { dev->pg_event = MEI_PG_EVENT_IDLE; - dev_dbg(dev->dev, - "aliveness settled after %d msecs\n", t); - return t; + dev_dbg(dev->dev, "aliveness settled after %lld usecs\n", + ktime_to_us(ktime_sub(ktime_get(), start))); + return 0; } - mutex_unlock(&dev->device_lock); - msleep(MSEC_PER_SEC / 5); - mutex_lock(&dev->device_lock); - t += MSEC_PER_SEC / 5; - } while (t < SEC_ALIVENESS_WAIT_TIMEOUT); + usleep_range(20, 50); + } while (ktime_before(ktime_get(), stop)); dev->pg_event = MEI_PG_EVENT_IDLE; dev_err(dev->dev, "aliveness timed out\n"); @@ -301,6 +301,18 @@ int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req) return 0; } +/** + * mei_txe_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_txe_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event == MEI_PG_EVENT_WAIT; +} + /** * mei_txe_pg_is_enabled - detect if PG is supported by HW * @@ -1138,6 +1150,7 @@ static const struct mei_hw_ops mei_txe_hw_ops = { .hw_config = mei_txe_hw_config, .hw_start = mei_txe_hw_start, + .pg_in_transition = mei_txe_pg_in_transition, .pg_is_enabled = mei_txe_pg_is_enabled, .intr_clear = mei_txe_intr_clear, diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 3e29681595064..e40bcd03bd47a 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -685,7 +685,7 @@ int mei_register(struct mei_device *dev, struct device *parent) /* Fill in the data structures */ devno = MKDEV(MAJOR(mei_devt), dev->minor); cdev_init(&dev->cdev, &mei_fops); - dev->cdev.owner = mei_fops.owner; + dev->cdev.owner = parent->driver->owner; /* Add the device */ ret = cdev_add(&dev->cdev, devno, 1); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index f066ecd719393..f84c39ee28a8c 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -271,6 +271,7 @@ struct mei_cl { * @fw_status : get fw status registers * @pg_state : power gating state of the device + * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled * @intr_clear : clear pending interrupts @@ -300,6 +301,7 @@ struct mei_hw_ops { int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); enum mei_pg_state (*pg_state)(struct mei_device *dev); + bool (*pg_in_transition)(struct mei_device *dev); bool (*pg_is_enabled)(struct mei_device *dev); void (*intr_clear)(struct mei_device *dev); @@ -398,11 +400,15 @@ struct mei_cl_device { * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete * @MEI_PG_EVENT_RECEIVED: the driver received pg event + * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt + * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt */ enum mei_pg_event { MEI_PG_EVENT_IDLE, MEI_PG_EVENT_WAIT, MEI_PG_EVENT_RECEIVED, + MEI_PG_EVENT_INTR_WAIT, + MEI_PG_EVENT_INTR_RECEIVED, }; /** @@ -717,6 +723,11 @@ static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) return dev->ops->pg_state(dev); } +static inline bool mei_pg_in_transition(struct mei_device *dev) +{ + return dev->ops->pg_in_transition(dev); +} + static inline bool mei_pg_is_enabled(struct mei_device *dev) { return dev->ops->pg_is_enabled(dev); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 60f7141a6b02e..31d2627d9d4d6 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -208,6 +208,8 @@ static ssize_t power_ro_lock_show(struct device *dev, ret = snprintf(buf, PAGE_SIZE, "%d\n", locked); + mmc_blk_put(md); + return ret; } @@ -1910,9 +1912,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) break; case MMC_BLK_CMD_ERR: ret = mmc_blk_cmd_err(md, card, brq, req, ret); - if (!mmc_blk_reset(md, card->host, type)) - break; - goto cmd_abort; + if (mmc_blk_reset(md, card->host, type)) + goto cmd_abort; + if (!ret) + goto start_new_req; + break; case MMC_BLK_RETRY: if (retry++ < 5) break; diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 92e7671426ebc..588fb7908642a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -330,8 +330,10 @@ EXPORT_SYMBOL(mmc_start_bkops); */ static void mmc_wait_data_done(struct mmc_request *mrq) { - mrq->host->context_info.is_done_rcv = true; - wake_up_interruptible(&mrq->host->context_info.wait); + struct mmc_context_info *context_info = &mrq->host->context_info; + + context_info->is_done_rcv = true; + wake_up_interruptible(&context_info->wait); } static void mmc_wait_done(struct mmc_request *mrq) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 8be0df758e682..a0b1b460377de 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -373,7 +373,7 @@ int mmc_of_parse(struct mmc_host *host) 0, &cd_gpio_invert); if (!ret) dev_info(host->parent, "Got CD GPIO\n"); - else if (ret != -ENOENT) + else if (ret != -ENOENT && ret != -ENOSYS) return ret; /* @@ -397,7 +397,7 @@ int mmc_of_parse(struct mmc_host *host) ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &ro_gpio_invert); if (!ret) dev_info(host->parent, "Got WP GPIO\n"); - else if (ret != -ENOENT) + else if (ret != -ENOENT && ret != -ENOSYS) return ret; /* See the comment on CD inversion above */ diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 31a9ef256d065..ce3044883a428 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -661,9 +661,25 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - (card->sd_bus_speed == UHS_SDR50_BUS_SPEED || - card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) + (card->host->ios.timing == MMC_TIMING_UHS_SDR50 || + card->host->ios.timing == MMC_TIMING_UHS_DDR50 || + card->host->ios.timing == MMC_TIMING_UHS_SDR104)) { err = mmc_execute_tuning(card); + + /* + * As SD Specifications Part1 Physical Layer Specification + * Version 3.01 says, CMD19 tuning is available for unlocked + * cards in transfer state of 1.8V signaling mode. The small + * difference between v3.00 and 3.01 spec means that CMD19 + * tuning is also available for DDR50 mode. + */ + if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) { + pr_warn("%s: ddr50 tuning failed\n", + mmc_hostname(card->host)); + err = 0; + } + } + out: kfree(status); diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 5bc6c7dbbd608..941beb3b5fa27 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -566,8 +566,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) || - (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104))) + ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) || + (card->host->ios.timing == MMC_TIMING_UHS_SDR104))) err = mmc_execute_tuning(card); out: return err; @@ -661,7 +661,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, */ if (!powered_resume && (rocr & ocr & R4_18V_PRESENT)) { err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180, - ocr); + ocr_card); if (err == -EAGAIN) { sdio_reset(host); mmc_go_idle(host); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 5f5adafb253af..b354c8bffb9e1 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -99,6 +99,9 @@ struct idmac_desc { __le32 des3; /* buffer 2 physical address */ }; + +/* Each descriptor can transfer up to 4KB of data in chained mode */ +#define DW_MCI_DESC_DATA_LENGTH 0x1000 #endif /* CONFIG_MMC_DW_IDMAC */ static bool dw_mci_reset(struct dw_mci *host); @@ -462,66 +465,96 @@ static void dw_mci_idmac_complete_dma(struct dw_mci *host) static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data, unsigned int sg_len) { + unsigned int desc_len; int i; if (host->dma_64bit_address == 1) { - struct idmac_desc_64addr *desc = host->sg_cpu; + struct idmac_desc_64addr *desc_first, *desc_last, *desc; + + desc_first = desc_last = desc = host->sg_cpu; - for (i = 0; i < sg_len; i++, desc++) { + for (i = 0; i < sg_len; i++) { unsigned int length = sg_dma_len(&data->sg[i]); u64 mem_addr = sg_dma_address(&data->sg[i]); - /* - * Set the OWN bit and disable interrupts for this - * descriptor - */ - desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | - IDMAC_DES0_CH; - /* Buffer length */ - IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, length); - - /* Physical address to DMA to/from */ - desc->des4 = mem_addr & 0xffffffff; - desc->des5 = mem_addr >> 32; + for ( ; length ; desc++) { + desc_len = (length <= DW_MCI_DESC_DATA_LENGTH) ? + length : DW_MCI_DESC_DATA_LENGTH; + + length -= desc_len; + + /* + * Set the OWN bit and disable interrupts + * for this descriptor + */ + desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | + IDMAC_DES0_CH; + + /* Buffer length */ + IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, desc_len); + + /* Physical address to DMA to/from */ + desc->des4 = mem_addr & 0xffffffff; + desc->des5 = mem_addr >> 32; + + /* Update physical address for the next desc */ + mem_addr += desc_len; + + /* Save pointer to the last descriptor */ + desc_last = desc; + } } /* Set first descriptor */ - desc = host->sg_cpu; - desc->des0 |= IDMAC_DES0_FD; + desc_first->des0 |= IDMAC_DES0_FD; /* Set last descriptor */ - desc = host->sg_cpu + (i - 1) * - sizeof(struct idmac_desc_64addr); - desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC); - desc->des0 |= IDMAC_DES0_LD; + desc_last->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC); + desc_last->des0 |= IDMAC_DES0_LD; } else { - struct idmac_desc *desc = host->sg_cpu; + struct idmac_desc *desc_first, *desc_last, *desc; + + desc_first = desc_last = desc = host->sg_cpu; - for (i = 0; i < sg_len; i++, desc++) { + for (i = 0; i < sg_len; i++) { unsigned int length = sg_dma_len(&data->sg[i]); u32 mem_addr = sg_dma_address(&data->sg[i]); - /* - * Set the OWN bit and disable interrupts for this - * descriptor - */ - desc->des0 = cpu_to_le32(IDMAC_DES0_OWN | - IDMAC_DES0_DIC | IDMAC_DES0_CH); - /* Buffer length */ - IDMAC_SET_BUFFER1_SIZE(desc, length); + for ( ; length ; desc++) { + desc_len = (length <= DW_MCI_DESC_DATA_LENGTH) ? + length : DW_MCI_DESC_DATA_LENGTH; + + length -= desc_len; + + /* + * Set the OWN bit and disable interrupts + * for this descriptor + */ + desc->des0 = cpu_to_le32(IDMAC_DES0_OWN | + IDMAC_DES0_DIC | + IDMAC_DES0_CH); + + /* Buffer length */ + IDMAC_SET_BUFFER1_SIZE(desc, desc_len); - /* Physical address to DMA to/from */ - desc->des2 = cpu_to_le32(mem_addr); + /* Physical address to DMA to/from */ + desc->des2 = cpu_to_le32(mem_addr); + + /* Update physical address for the next desc */ + mem_addr += desc_len; + + /* Save pointer to the last descriptor */ + desc_last = desc; + } } /* Set first descriptor */ - desc = host->sg_cpu; - desc->des0 |= cpu_to_le32(IDMAC_DES0_FD); + desc_first->des0 |= cpu_to_le32(IDMAC_DES0_FD); /* Set last descriptor */ - desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc); - desc->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | IDMAC_DES0_DIC)); - desc->des0 |= cpu_to_le32(IDMAC_DES0_LD); + desc_last->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | + IDMAC_DES0_DIC)); + desc_last->des0 |= cpu_to_le32(IDMAC_DES0_LD); } wmb(); @@ -2406,7 +2439,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) #ifdef CONFIG_MMC_DW_IDMAC mmc->max_segs = host->ring_size; mmc->max_blk_size = 65536; - mmc->max_seg_size = 0x1000; + mmc->max_seg_size = DW_MCI_DESC_DATA_LENGTH; mmc->max_req_size = mmc->max_seg_size * host->ring_size; mmc->max_blk_count = mmc->max_req_size / 512; #else diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index fb266745f8240..acece3299756e 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1886,7 +1886,7 @@ static struct amba_id mmci_ids[] = { { .id = 0x00280180, .mask = 0x00ffffff, - .data = &variant_u300, + .data = &variant_nomadik, }, { .id = 0x00480180, diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 9df2b6801f767..d0abdffb0d7c2 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1062,6 +1062,10 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status) if (status & (CTO_EN | CCRC_EN)) end_cmd = 1; + if (host->data || host->response_busy) { + end_trans = !end_cmd; + host->response_busy = 0; + } if (status & (CTO_EN | DTO_EN)) hsmmc_command_incomplete(host, -ETIMEDOUT, end_cmd); else if (status & (CCRC_EN | DCRC_EN)) @@ -1081,10 +1085,6 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status) } dev_dbg(mmc_dev(host->mmc), "AC12 err: 0x%x\n", ac12); } - if (host->data || host->response_busy) { - end_trans = !end_cmd; - host->response_busy = 0; - } } OMAP_HSMMC_WRITE(host->base, STAT, status); diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 82f512d87cb89..461698b038f75 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -868,6 +868,7 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, struct esdhc_platform_data *boarddata) { struct device_node *np = pdev->dev.of_node; + int ret; if (!np) return -ENODEV; @@ -903,6 +904,14 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, mmc_of_parse_voltage(np, &host->ocr_mask); + /* call to generic mmc_of_parse to support additional capabilities */ + ret = mmc_of_parse(host->mmc); + if (ret) + return ret; + + if (!IS_ERR_VALUE(mmc_gpio_get_cd(host->mmc))) + host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; + return 0; } #else @@ -924,6 +933,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) struct esdhc_platform_data *boarddata; int err; struct pltfm_imx_data *imx_data; + bool dt = true; host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata, 0); if (IS_ERR(host)) @@ -1011,11 +1021,44 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) } imx_data->boarddata = *((struct esdhc_platform_data *) host->mmc->parent->platform_data); + dt = false; + } + /* write_protect */ + if (boarddata->wp_type == ESDHC_WP_GPIO && !dt) { + err = mmc_gpio_request_ro(host->mmc, boarddata->wp_gpio); + if (err) { + dev_err(mmc_dev(host->mmc), + "failed to request write-protect gpio!\n"); + goto disable_clk; + } + host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; } /* card_detect */ - if (boarddata->cd_type == ESDHC_CD_CONTROLLER) + switch (boarddata->cd_type) { + case ESDHC_CD_GPIO: + if (dt) + break; + err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0); + if (err) { + dev_err(mmc_dev(host->mmc), + "failed to request card-detect gpio!\n"); + goto disable_clk; + } + /* fall through */ + + case ESDHC_CD_CONTROLLER: + /* we have a working card_detect back */ host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; + break; + + case ESDHC_CD_PERMANENT: + host->mmc->caps |= MMC_CAP_NONREMOVABLE; + break; + + case ESDHC_CD_NONE: + break; + } switch (boarddata->max_bus_width) { case 8: @@ -1048,11 +1091,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; } - /* call to generic mmc_of_parse to support additional capabilities */ - err = mmc_of_parse(host->mmc); - if (err) - goto disable_clk; - err = sdhci_add_host(host); if (err) goto disable_clk; diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h index 3497cfaf683c5..a870c42731d7a 100644 --- a/drivers/mmc/host/sdhci-esdhc.h +++ b/drivers/mmc/host/sdhci-esdhc.h @@ -45,6 +45,6 @@ #define ESDHC_DMA_SYSCTL 0x40c #define ESDHC_DMA_SNOOP 0x00000040 -#define ESDHC_HOST_CONTROL_RES 0x05 +#define ESDHC_HOST_CONTROL_RES 0x01 #endif /* _DRIVERS_MMC_SDHCI_ESDHC_H */ diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 7a3fc16d0a6c6..53cfc7cedefec 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -549,6 +549,7 @@ static int jmicron_resume(struct sdhci_pci_chip *chip) static const struct sdhci_pci_fixes sdhci_o2 = { .probe = sdhci_pci_o2_probe, .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD, .probe_slot = sdhci_pci_o2_probe_slot, .resume = sdhci_pci_o2_resume, }; diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index b5103a247bc1b..065dc70caa1d1 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -411,6 +411,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev) goto err_of_parse; sdhci_get_of_property(pdev); pdata = pxav3_get_mmc_pdata(dev); + pdev->dev.platform_data = pdata; } else if (pdata) { /* on-chip device */ if (pdata->flags & PXA_FLAG_CARD_PERMANENT) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c80287a027356..f47c4a8370bea 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -55,8 +55,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode); static void sdhci_tuning_timer(unsigned long data); static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable); static int sdhci_pre_dma_transfer(struct sdhci_host *host, - struct mmc_data *data, - struct sdhci_host_next *next); + struct mmc_data *data); static int sdhci_do_get_cd(struct sdhci_host *host); #ifdef CONFIG_PM @@ -510,7 +509,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, goto fail; BUG_ON(host->align_addr & host->align_mask); - host->sg_count = sdhci_pre_dma_transfer(host, data, NULL); + host->sg_count = sdhci_pre_dma_transfer(host, data); if (host->sg_count < 0) goto unmap_align; @@ -556,9 +555,12 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, BUG_ON(len > 65536); - /* tran, valid */ - sdhci_adma_write_desc(host, desc, addr, len, ADMA2_TRAN_VALID); - desc += host->desc_sz; + if (len) { + /* tran, valid */ + sdhci_adma_write_desc(host, desc, addr, len, + ADMA2_TRAN_VALID); + desc += host->desc_sz; + } /* * If this triggers then we have a calculation bug @@ -649,9 +651,11 @@ static void sdhci_adma_table_post(struct sdhci_host *host, } } - if (!data->host_cookie) + if (data->host_cookie == COOKIE_MAPPED) { dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, direction); + data->host_cookie = COOKIE_UNMAPPED; + } } static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) @@ -847,8 +851,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) } else { int sg_cnt; - sg_cnt = sdhci_pre_dma_transfer(host, data, NULL); - if (sg_cnt == 0) { + sg_cnt = sdhci_pre_dma_transfer(host, data); + if (sg_cnt <= 0) { /* * This only happens when someone fed * us an invalid request. @@ -963,11 +967,13 @@ static void sdhci_finish_data(struct sdhci_host *host) if (host->flags & SDHCI_USE_ADMA) sdhci_adma_table_post(host, data); else { - if (!data->host_cookie) + if (data->host_cookie == COOKIE_MAPPED) { dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, (data->flags & MMC_DATA_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + data->host_cookie = COOKIE_UNMAPPED; + } } } @@ -1146,6 +1152,7 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host) preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104); break; case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50); break; case MMC_TIMING_MMC_HS400: @@ -1598,7 +1605,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) (ios->timing == MMC_TIMING_UHS_SDR25) || (ios->timing == MMC_TIMING_UHS_SDR50) || (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50))) { + (ios->timing == MMC_TIMING_UHS_DDR50) || + (ios->timing == MMC_TIMING_MMC_DDR52))) { u16 preset; sdhci_enable_preset_value(host, true); @@ -2129,49 +2137,36 @@ static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq, struct mmc_data *data = mrq->data; if (host->flags & SDHCI_REQ_USE_DMA) { - if (data->host_cookie) + if (data->host_cookie == COOKIE_GIVEN || + data->host_cookie == COOKIE_MAPPED) dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - mrq->data->host_cookie = 0; + data->host_cookie = COOKIE_UNMAPPED; } } static int sdhci_pre_dma_transfer(struct sdhci_host *host, - struct mmc_data *data, - struct sdhci_host_next *next) + struct mmc_data *data) { int sg_count; - if (!next && data->host_cookie && - data->host_cookie != host->next_data.cookie) { - pr_debug(DRIVER_NAME "[%s] invalid cookie: %d, next-cookie %d\n", - __func__, data->host_cookie, host->next_data.cookie); - data->host_cookie = 0; + if (data->host_cookie == COOKIE_MAPPED) { + data->host_cookie = COOKIE_GIVEN; + return data->sg_count; } - /* Check if next job is already prepared */ - if (next || - (!next && data->host_cookie != host->next_data.cookie)) { - sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, - data->flags & MMC_DATA_WRITE ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - - } else { - sg_count = host->next_data.sg_count; - host->next_data.sg_count = 0; - } + WARN_ON(data->host_cookie == COOKIE_GIVEN); + sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + data->flags & MMC_DATA_WRITE ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (sg_count == 0) - return -EINVAL; + return -ENOSPC; - if (next) { - next->sg_count = sg_count; - data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie; - } else - host->sg_count = sg_count; + data->sg_count = sg_count; + data->host_cookie = COOKIE_MAPPED; return sg_count; } @@ -2181,16 +2176,10 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq, { struct sdhci_host *host = mmc_priv(mmc); - if (mrq->data->host_cookie) { - mrq->data->host_cookie = 0; - return; - } + mrq->data->host_cookie = COOKIE_UNMAPPED; if (host->flags & SDHCI_REQ_USE_DMA) - if (sdhci_pre_dma_transfer(host, - mrq->data, - &host->next_data) < 0) - mrq->data->host_cookie = 0; + sdhci_pre_dma_transfer(host, mrq->data); } static void sdhci_card_event(struct mmc_host *mmc) @@ -2804,7 +2793,7 @@ static int sdhci_runtime_pm_put(struct sdhci_host *host) static void sdhci_runtime_pm_bus_on(struct sdhci_host *host) { - if (host->runtime_suspended || host->bus_on) + if (host->bus_on) return; host->bus_on = true; pm_runtime_get_noresume(host->mmc->parent); @@ -2812,7 +2801,7 @@ static void sdhci_runtime_pm_bus_on(struct sdhci_host *host) static void sdhci_runtime_pm_bus_off(struct sdhci_host *host) { - if (host->runtime_suspended || !host->bus_on) + if (!host->bus_on) return; host->bus_on = false; pm_runtime_put_noidle(host->mmc->parent); @@ -3037,8 +3026,11 @@ int sdhci_add_host(struct sdhci_host *host) GFP_KERNEL); host->align_buffer = kmalloc(host->align_buffer_sz, GFP_KERNEL); if (!host->adma_table || !host->align_buffer) { - dma_free_coherent(mmc_dev(mmc), host->adma_table_sz, - host->adma_table, host->adma_addr); + if (host->adma_table) + dma_free_coherent(mmc_dev(mmc), + host->adma_table_sz, + host->adma_table, + host->adma_addr); kfree(host->align_buffer); pr_warn("%s: Unable to allocate ADMA buffers - falling back to standard DMA\n", mmc_hostname(mmc)); @@ -3085,7 +3077,6 @@ int sdhci_add_host(struct sdhci_host *host) host->max_clk = host->ops->get_max_clock(host); } - host->next_data.cookie = 1; /* * In case of Host Controller v3.00, find out whether clock * multiplier is supported. @@ -3315,13 +3306,14 @@ int sdhci_add_host(struct sdhci_host *host) SDHCI_MAX_CURRENT_MULTIPLIER; } - /* If OCR set by external regulators, use it instead */ + /* If OCR set by host, use it instead. */ + if (host->ocr_mask) + ocr_avail = host->ocr_mask; + + /* If OCR set by external regulators, give it highest prio. */ if (mmc->ocr_avail) ocr_avail = mmc->ocr_avail; - if (host->ocr_mask) - ocr_avail &= host->ocr_mask; - mmc->ocr_avail = ocr_avail; mmc->ocr_avail_sdio = ocr_avail; if (host->ocr_avail_sdio) diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index e639b7f435e56..eea23f62356ac 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -309,9 +309,10 @@ struct sdhci_adma2_64_desc { */ #define SDHCI_MAX_SEGS 128 -struct sdhci_host_next { - unsigned int sg_count; - s32 cookie; +enum sdhci_cookie { + COOKIE_UNMAPPED, + COOKIE_MAPPED, + COOKIE_GIVEN, }; struct sdhci_host { @@ -506,7 +507,6 @@ struct sdhci_host { #define SDHCI_TUNING_MODE_1 0 struct timer_list tuning_timer; /* Timer for tuning */ - struct sdhci_host_next next_data; unsigned long private[0] ____cacheline_aligned; }; diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c index f8a7dd14cee0c..70a3db3ab856f 100644 --- a/drivers/mtd/maps/dc21285.c +++ b/drivers/mtd/maps/dc21285.c @@ -38,9 +38,9 @@ static void nw_en_write(void) * we want to write a bit pattern XXX1 to Xilinx to enable * the write gate, which will be open for about the next 2ms. */ - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_cpld_modify(CPLD_FLASH_WR_ENABLE, CPLD_FLASH_WR_ENABLE); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); /* * let the ISA bus to catch on... diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 2b0c528709997..df7c6c70757a6 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -197,6 +197,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/ mutex_lock(&dev->lock); + mutex_lock(&mtd_table_mutex); if (dev->open) goto unlock; @@ -220,6 +221,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) unlock: dev->open++; + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); return ret; @@ -230,6 +232,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) error_put: module_put(dev->tr->owner); kref_put(&dev->ref, blktrans_dev_release); + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); return ret; @@ -243,6 +246,7 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode) return; mutex_lock(&dev->lock); + mutex_lock(&mtd_table_mutex); if (--dev->open) goto unlock; @@ -256,6 +260,7 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode) __put_mtd_device(dev->mtd); } unlock: + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); } diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index a4615fcc3d001..94a357d93baba 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1475,6 +1475,9 @@ static int pxa3xx_nand_scan(struct mtd_info *mtd) if (pdata->keep_config && !pxa3xx_nand_detect_config(info)) goto KEEP_CONFIG; + /* Set a default chunk size */ + info->chunk_size = 512; + ret = pxa3xx_nand_sensing(info); if (ret) { dev_info(&info->pdev->dev, "There is no chip on cs %d!\n", diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 6f93b2990d250..499b8e433d3da 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -138,6 +138,10 @@ #define NFC_ECC_MODE GENMASK(15, 12) #define NFC_RANDOM_SEED GENMASK(30, 16) +/* NFC_USER_DATA helper macros */ +#define NFC_BUF_TO_USER_DATA(buf) ((buf)[0] | ((buf)[1] << 8) | \ + ((buf)[2] << 16) | ((buf)[3] << 24)) + #define NFC_DEFAULT_TIMEOUT_MS 1000 #define NFC_SRAM_SIZE 1024 @@ -632,15 +636,9 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, offset = layout->eccpos[i * ecc->bytes] - 4 + mtd->writesize; /* Fill OOB data in */ - if (oob_required) { - tmp = 0xffffffff; - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp, - 4); - } else { - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, - chip->oob_poi + offset - mtd->writesize, - 4); - } + writel(NFC_BUF_TO_USER_DATA(chip->oob_poi + + layout->oobfree[i].offset), + nfc->regs + NFC_REG_USER_DATA_BASE); chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1); @@ -770,14 +768,8 @@ static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd, offset += ecc->size; /* Fill OOB data in */ - if (oob_required) { - tmp = 0xffffffff; - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp, - 4); - } else { - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, oob, - 4); - } + writel(NFC_BUF_TO_USER_DATA(oob), + nfc->regs + NFC_REG_USER_DATA_BASE); tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR | (1 << 30); @@ -1312,6 +1304,7 @@ static void sunxi_nand_chips_cleanup(struct sunxi_nfc *nfc) node); nand_release(&chip->mtd); sunxi_nand_ecc_cleanup(&chip->nand.ecc); + list_del(&chip->node); } } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 5bbd1f094f4e3..1fc23e48fe8e4 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -926,6 +926,11 @@ static int validate_vid_hdr(const struct ubi_device *ubi, goto bad; } + if (data_size > ubi->leb_size) { + ubi_err(ubi, "bad data_size"); + goto bad; + } + if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 2a1b6e037e1a1..0134ba32a0578 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -193,7 +193,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, vol->changing_leb = 1; vol->ch_lnum = req->lnum; - vol->upd_buf = vmalloc(req->bytes); + vol->upd_buf = vmalloc(ALIGN((int)req->bytes, ubi->min_io_size)); if (!vol->upd_buf) return -ENOMEM; diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 68c9c5ea676f7..bf2f916df4e2e 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -646,6 +646,7 @@ static int init_volumes(struct ubi_device *ubi, if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + return -ENOSPC; } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 16214d3d57a4d..18fef94542f89 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1601,6 +1601,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + err = -ENOSPC; goto out_free; } ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d5fe5d5f490f3..bd744e31c434f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev); static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats); static void bond_slave_arr_handler(struct work_struct *work); +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); /*---------------------------- General routines -----------------------------*/ @@ -625,6 +627,23 @@ static void bond_set_dev_addr(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); } +static struct slave *bond_get_old_active(struct bonding *bond, + struct slave *new_active) +{ + struct slave *slave; + struct list_head *iter; + + bond_for_each_slave(bond, slave, iter) { + if (slave == new_active) + continue; + + if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) + return slave; + } + + return NULL; +} + /* bond_do_fail_over_mac * * Perform special MAC address swapping for fail_over_mac settings @@ -652,6 +671,9 @@ static void bond_do_fail_over_mac(struct bonding *bond, if (!new_active) return; + if (!old_active) + old_active = bond_get_old_active(bond, new_active); + if (old_active) { ether_addr_copy(tmp_mac, new_active->dev->dev_addr); ether_addr_copy(saddr.sa_data, @@ -1174,7 +1196,6 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); if (err) return err; - slave_dev->flags |= IFF_SLAVE; rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); return 0; } @@ -1432,6 +1453,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } + /* set slave flag before open to prevent IPv6 addrconf */ + slave_dev->flags |= IFF_SLAVE; + /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { @@ -1692,6 +1716,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) dev_close(slave_dev); err_restore_mac: + slave_dev->flags &= ~IFF_SLAVE; if (!bond->params.fail_over_mac || BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* XXX TODO - fom follow mode needs to change master's @@ -1902,6 +1927,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; netdev_info(bond_dev, "Destroying bond %s\n", bond_dev->name); + bond_remove_proc_entry(bond); unregister_netdevice(bond_dev); } return ret; @@ -2373,7 +2399,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct arphdr *arp = (struct arphdr *)skb->data; - struct slave *curr_active_slave; + struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); @@ -2420,26 +2446,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, &sip, &tip); curr_active_slave = rcu_dereference(bond->curr_active_slave); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); - /* Backup slaves won't see the ARP reply, but do come through - * here for each ARP probe (so we swap the sip/tip to validate - * the probe). In a "redundant switch, common router" type of - * configuration, the ARP probe will (hopefully) travel from - * the active, through one switch, the router, then the other - * switch before reaching the backup. + /* We 'trust' the received ARP enough to validate it if: * - * We 'trust' the arp requests if there is an active slave and - * it received valid arp reply(s) after it became active. This - * is done to avoid endless looping when we can't reach the + * (a) the slave receiving the ARP is active (which includes the + * current ARP slave, if any), or + * + * (b) the receiving slave isn't active, but there is a currently + * active slave and it received valid arp reply(s) after it became + * the currently active slave, or + * + * (c) there is an ARP slave that sent an ARP during the prior ARP + * interval, and we receive an ARP reply on any slave. We accept + * these because switch FDB update delays may deliver the ARP + * reply to a slave other than the sender of the ARP request. + * + * Note: for (b), backup slaves are receiving the broadcast ARP + * request, not a reply. This request passes from the sending + * slave through the L2 switch(es) to the receiving slave. Since + * this is checking the request, sip/tip are swapped for + * validation. + * + * This is done to avoid endless looping when we can't reach the * arp_ip_target and fool ourselves with our own arp requests. */ - if (bond_is_active_slave(slave)) bond_validate_arp(bond, slave, sip, tip); else if (curr_active_slave && time_after(slave_last_rx(bond, curr_active_slave), curr_active_slave->last_link_up)) bond_validate_arp(bond, slave, tip, sip); + else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && + bond_time_in_interval(bond, + dev_trans_start(curr_arp_slave->dev), 1)) + bond_validate_arp(bond, slave, sip, tip); out_unlock: if (arp != (struct arphdr *)skb->data) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 041525d2595ce..5d214d1353320 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -592,6 +592,7 @@ static int c_can_start(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); int err; + struct pinctrl *p; /* basic c_can configuration */ err = c_can_chip_config(dev); @@ -604,8 +605,13 @@ static int c_can_start(struct net_device *dev) priv->can.state = CAN_STATE_ERROR_ACTIVE; - /* activate pins */ - pinctrl_pm_select_default_state(dev->dev.parent); + /* Attempt to use "active" if available else use "default" */ + p = pinctrl_get_select(priv->device, "active"); + if (!IS_ERR(p)) + pinctrl_put(p); + else + pinctrl_pm_select_default_state(priv->device); + return 0; } diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b0f69248cb71c..141c2a42d7eda 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -585,6 +585,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); @@ -613,6 +614,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame)); memset(*cfd, 0, sizeof(struct canfd_frame)); @@ -913,7 +915,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put(skb, IFLA_CAN_BITTIMING_CONST, sizeof(*priv->bittiming_const), priv->bittiming_const)) || - nla_put(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock) || + nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || nla_put_u32(skb, IFLA_CAN_STATE, state) || nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 7deb80dcbe8c0..2f9ebad4ff567 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -526,7 +526,7 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); @@ -758,8 +758,9 @@ static int rcar_can_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); + err = irq; goto fail; } @@ -823,7 +824,7 @@ static int rcar_can_probe(struct platform_device *pdev) devm_can_led_init(ndev); - dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n", priv->regs, ndev->irq); return 0; diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 32bd7f451aa42..0c048e261ee63 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev) priv->write_reg(priv, SJA1000_RXERR, 0x0); priv->read_reg(priv, SJA1000_ECC); + /* clear interrupt flags */ + priv->read_reg(priv, SJA1000_IR); + /* leave reset mode */ set_normal_mode(dev); } diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index c837eb91d43e3..a23a7af8eb9a0 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -214,6 +214,7 @@ static void slc_bump(struct slcan *sl) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bf63fee4e743a..34c625ea28017 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1221,17 +1221,16 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); - if (priv->after_suspend & AFTER_SUSPEND_POWER) { + if (priv->after_suspend & AFTER_SUSPEND_POWER) mcp251x_power_enable(priv->power, 1); + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + mcp251x_power_enable(priv->transceiver, 1); queue_work(priv->wq, &priv->restart_work); } else { - if (priv->after_suspend & AFTER_SUSPEND_UP) { - mcp251x_power_enable(priv->transceiver, 1); - queue_work(priv->wq, &priv->restart_work); - } else { - priv->after_suspend = 0; - } + priv->after_suspend = 0; } + priv->force_quit = 0; enable_irq(spi->irq); return 0; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 866bac0ae7e96..339b0c5ce60c3 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -117,6 +117,9 @@ MODULE_LICENSE("GPL v2"); */ #define EMS_USB_ARM7_CLOCK 8000000 +#define CPC_TX_QUEUE_TRIGGER_LOW 25 +#define CPC_TX_QUEUE_TRIGGER_HIGH 35 + /* * CAN-Message representation in a CPC_MSG. Message object type is * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or @@ -278,6 +281,11 @@ static void ems_usb_read_interrupt_callback(struct urb *urb) switch (urb->status) { case 0: dev->free_slots = dev->intr_in_buffer[1]; + if(dev->free_slots > CPC_TX_QUEUE_TRIGGER_HIGH){ + if (netif_queue_stopped(netdev)){ + netif_wake_queue(netdev); + } + } break; case -ECONNRESET: /* unlink */ @@ -529,8 +537,6 @@ static void ems_usb_write_bulk_callback(struct urb *urb) /* Release context */ context->echo_index = MAX_TX_URBS; - if (netif_queue_stopped(netdev)) - netif_wake_queue(netdev); } /* @@ -590,7 +596,7 @@ static int ems_usb_start(struct ems_usb *dev) int err, i; dev->intr_in_buffer[0] = 0; - dev->free_slots = 15; /* initial size */ + dev->free_slots = 50; /* initial size */ for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; @@ -838,7 +844,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne /* Slow down tx path */ if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS || - dev->free_slots < 5) { + dev->free_slots < CPC_TX_QUEUE_TRIGGER_LOW) { netif_stop_queue(netdev); } } diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 72427f21edffa..edfec540c8939 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -855,6 +855,18 @@ static int pcan_usb_probe(struct usb_interface *intf) /* * describe the PCAN-USB adapter */ +static const struct can_bittiming_const pcan_usb_const = { + .name = "pcan_usb", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 64, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb = { .name = "PCAN-USB", .device_id = PCAN_USB_PRODUCT_ID, @@ -863,17 +875,7 @@ const struct peak_usb_adapter pcan_usb = { .clock = { .freq = PCAN_USB_CRYSTAL_HZ / 2 , }, - .bittiming_const = { - .name = "pcan_usb", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 64, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb), diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 7921cff93a63b..5a2e341a6d1ea 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -792,9 +792,9 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, dev->ep_msg_out = peak_usb_adapter->ep_msg_out[ctrl_idx]; dev->can.clock = peak_usb_adapter->clock; - dev->can.bittiming_const = &peak_usb_adapter->bittiming_const; + dev->can.bittiming_const = peak_usb_adapter->bittiming_const; dev->can.do_set_bittiming = peak_usb_set_bittiming; - dev->can.data_bittiming_const = &peak_usb_adapter->data_bittiming_const; + dev->can.data_bittiming_const = peak_usb_adapter->data_bittiming_const; dev->can.do_set_data_bittiming = peak_usb_set_data_bittiming; dev->can.do_set_mode = peak_usb_set_mode; dev->can.do_get_berr_counter = peak_usb_adapter->do_get_berr_counter; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index 9e624f05ad4d9..506fe506c9d37 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -48,8 +48,8 @@ struct peak_usb_adapter { u32 device_id; u32 ctrlmode_supported; struct can_clock clock; - const struct can_bittiming_const bittiming_const; - const struct can_bittiming_const data_bittiming_const; + const struct can_bittiming_const * const bittiming_const; + const struct can_bittiming_const * const data_bittiming_const; unsigned int ctrl_count; int (*intf_probe)(struct usb_interface *intf); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 09d14e70abd74..ce44a033f63bb 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -990,6 +990,30 @@ static void pcan_usb_fd_free(struct peak_usb_device *dev) } /* describes the PCAN-USB FD adapter */ +static const struct can_bittiming_const pcan_usb_fd_const = { + .name = "pcan_usb_fd", + .tseg1_min = 1, + .tseg1_max = 64, + .tseg2_min = 1, + .tseg2_max = 16, + .sjw_max = 16, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_fd_data_const = { + .name = "pcan_usb_fd", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb_fd = { .name = "PCAN-USB FD", .device_id = PCAN_USBFD_PRODUCT_ID, @@ -999,28 +1023,8 @@ const struct peak_usb_adapter pcan_usb_fd = { .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, - .bittiming_const = { - .name = "pcan_usb_fd", - .tseg1_min = 1, - .tseg1_max = 64, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, - .data_bittiming_const = { - .name = "pcan_usb_fd", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_fd_const, + .data_bittiming_const = &pcan_usb_fd_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), @@ -1058,6 +1062,30 @@ const struct peak_usb_adapter pcan_usb_fd = { }; /* describes the PCAN-USB Pro FD adapter */ +static const struct can_bittiming_const pcan_usb_pro_fd_const = { + .name = "pcan_usb_pro_fd", + .tseg1_min = 1, + .tseg1_max = 64, + .tseg2_min = 1, + .tseg2_max = 16, + .sjw_max = 16, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_pro_fd_data_const = { + .name = "pcan_usb_pro_fd", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb_pro_fd = { .name = "PCAN-USB Pro FD", .device_id = PCAN_USBPROFD_PRODUCT_ID, @@ -1067,28 +1095,8 @@ const struct peak_usb_adapter pcan_usb_pro_fd = { .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, - .bittiming_const = { - .name = "pcan_usb_pro_fd", - .tseg1_min = 1, - .tseg1_max = 64, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, - .data_bittiming_const = { - .name = "pcan_usb_pro_fd", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_pro_fd_const, + .data_bittiming_const = &pcan_usb_pro_fd_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index dec51717635e9..a5ad2e6aa73a4 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -1004,6 +1004,18 @@ int pcan_usb_pro_probe(struct usb_interface *intf) /* * describe the PCAN-USB Pro adapter */ +static const struct can_bittiming_const pcan_usb_pro_const = { + .name = "pcan_usb_pro", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb_pro = { .name = "PCAN-USB Pro", .device_id = PCAN_USBPRO_PRODUCT_ID, @@ -1012,17 +1024,7 @@ const struct peak_usb_adapter pcan_usb_pro = { .clock = { .freq = PCAN_USBPRO_CRYSTAL_HZ, }, - .bittiming_const = { - .name = "pcan_usb_pro", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_pro_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_pro_device), diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cedb572bf25af..db9ebbc1a7325 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -417,7 +417,7 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) core_writel(priv, port, CORE_FAST_AGE_PORT); reg = core_readl(priv, CORE_FAST_AGE_CTRL); - reg |= EN_AGE_PORT | FAST_AGE_STR_DONE; + reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE; core_writel(priv, reg, CORE_FAST_AGE_CTRL); do { @@ -431,6 +431,8 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) if (!timeout) return -ETIMEDOUT; + core_writel(priv, 0, CORE_FAST_AGE_CTRL); + return 0; } @@ -506,7 +508,7 @@ static int bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, u32 reg; reg = core_readl(priv, CORE_G_PCTL_PORT(port)); - cur_hw_state = reg >> G_MISTP_STATE_SHIFT; + cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); switch (state) { case BR_STATE_DISABLED: @@ -530,10 +532,12 @@ static int bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, } /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding state to Disabled, Blocking or Listening state + * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening + * state (hw_state) */ if (cur_hw_state != hw_state) { - if (cur_hw_state & 4 && !(hw_state & 4)) { + if (cur_hw_state >= G_MISTP_LEARN_STATE && + hw_state <= G_MISTP_LISTEN_STATE) { ret = bcm_sf2_sw_fast_age_port(ds, port); if (ret) { pr_err("%s: fast-ageing failed\n", __func__); @@ -889,15 +893,11 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, struct fixed_phy_status *status) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - u32 duplex, pause, speed; + u32 duplex, pause; u32 reg; duplex = core_readl(priv, CORE_DUPSTS); pause = core_readl(priv, CORE_PAUSESTS); - speed = core_readl(priv, CORE_SPDSTS); - - speed >>= (port * SPDSTS_SHIFT); - speed &= SPDSTS_MASK; status->link = 0; @@ -925,18 +925,6 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, reg &= ~LINK_STS; core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); - switch (speed) { - case SPDSTS_10: - status->speed = SPEED_10; - break; - case SPDSTS_100: - status->speed = SPEED_100; - break; - case SPDSTS_1000: - status->speed = SPEED_1000; - break; - } - if ((pause & (1 << port)) && (pause & (1 << (port + PAUSESTS_TX_PAUSE_SHIFT)))) { status->asym_pause = 1; diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 22e2ebf313332..789d7b7737da4 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -112,8 +112,8 @@ static inline u64 name##_readq(struct bcm_sf2_priv *priv, u32 off) \ spin_unlock(&priv->indir_lock); \ return (u64)indir << 32 | dir; \ } \ -static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off, \ - u64 val) \ +static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \ + u32 off) \ { \ spin_lock(&priv->indir_lock); \ reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE); \ diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index da48e66377b5f..8207877d62376 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -511,8 +511,7 @@ static int tse_poll(struct napi_struct *napi, int budget) if (rxcomplete < budget) { - napi_gro_flush(napi, false); - __napi_complete(napi); + napi_complete(napi); netdev_dbg(priv->dev, "NAPI Complete, did %d packets with budget %d\n", diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index d81fc6bd47590..5c92fb71b37ed 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -263,7 +263,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, int ret; /* Try to obtain pages, decreasing order if necessary */ - gfp |= __GFP_COLD | __GFP_COMP; + gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; while (order >= 0) { pages = alloc_pages(gfp, order); if (pages) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 932bd1862f7ad..6e9036a06515f 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -1014,13 +1014,12 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter) sizeof(struct atl1c_recv_ret_status) * rx_desc_count + 8 * 4; - ring_header->desc = pci_alloc_consistent(pdev, ring_header->size, - &ring_header->dma); + ring_header->desc = dma_zalloc_coherent(&pdev->dev, ring_header->size, + &ring_header->dma, GFP_KERNEL); if (unlikely(!ring_header->desc)) { - dev_err(&pdev->dev, "pci_alloc_consistend failed\n"); + dev_err(&pdev->dev, "could not get memory for DMA buffer\n"); goto err_nomem; } - memset(ring_header->desc, 0, ring_header->size); /* init TPD ring */ tpd_ring[0].dma = roundup(ring_header->dma, 8); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 33501bcddc48e..8a97d28f3d650 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9323,7 +9323,8 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) * function stop ramrod is sent, since as part of this ramrod FW access * PTP registers. */ - bnx2x_stop_ptp(bp); + if (bp->flags & PTP_SUPPORTED) + bnx2x_stop_ptp(bp); /* Disable HW interrupts, NAPI */ bnx2x_netif_stop(bp, 1); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 069952fa5d644..d5415205779fb 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7833,6 +7833,14 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, return ret; } +static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb) +{ + /* Check if we will never have enough descriptors, + * as gso_segs can be more than current ring size + */ + return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3; +} + static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); /* Use GSO to workaround all TSO packets that meet HW bug conditions @@ -7936,14 +7944,19 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) * vlan encapsulated. */ if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - return tg3_tso_bug(tp, tnapi, txq, skb); + skb->protocol == htons(ETH_P_8021AD)) { + if (tg3_tso_bug_gso_check(tnapi, skb)) + return tg3_tso_bug(tp, tnapi, txq, skb); + goto drop; + } if (!skb_is_gso_v6(skb)) { if (unlikely((ETH_HLEN + hdr_len) > 80) && - tg3_flag(tp, TSO_BUG)) - return tg3_tso_bug(tp, tnapi, txq, skb); - + tg3_flag(tp, TSO_BUG)) { + if (tg3_tso_bug_gso_check(tnapi, skb)) + return tg3_tso_bug(tp, tnapi, txq, skb); + goto drop; + } ip_csum = iph->check; ip_tot_len = iph->tot_len; iph->check = 0; @@ -8075,7 +8088,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (would_hit_hwbug) { tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); - if (mss) { + if (mss && tg3_tso_bug_gso_check(tnapi, skb)) { /* If it's a TSO packet, do GSO instead of * allocating and copying to a large linear SKB */ @@ -10757,7 +10770,7 @@ static ssize_t tg3_show_temp(struct device *dev, tg3_ape_scratchpad_read(tp, &temperature, attr->index, sizeof(temperature)); spin_unlock_bh(&tp->lock); - return sprintf(buf, "%u\n", temperature); + return sprintf(buf, "%u\n", temperature * 1000); } diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index caae6cb2bc1a4..a1c30ee60888e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -675,6 +675,7 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) if (!next_cmpl->valid) break; } + packets++; /* TODO: BNA_CQ_EF_LOCAL ? */ if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR | @@ -691,7 +692,6 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) else bnad_cq_setup_skb_frags(rcb, skb, sop_ci, nvecs, len); - packets++; rcb->rxq->rx_packets++; rcb->rxq->rx_bytes += totlen; ccb->bytes_per_intr += totlen; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 66d47e448e4d1..570390b5cd42f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1396,6 +1396,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) if ((status & BD_ENET_RX_LAST) == 0) netdev_err(ndev, "rcv is not +last\n"); + writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); /* Check for errors. */ if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 28df37420da96..ac02c675c59c4 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr { u32 index; }; -#define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4SYNC_ETHTOOL_REGS_VER 2 +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 #endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index dc79ed85030b7..32e77755a9c61 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2010,7 +2010,7 @@ const struct e1000_info e1000_82573_info = { .flags2 = FLAG2_DISABLE_ASPM_L1 | FLAG2_DISABLE_ASPM_L0S, .pba = 20, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_82571, .mac_ops = &e82571_mac_ops, .phy_ops = &e82_phy_ops_m88, diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9d81c03174334..e2498dbf3c3b6 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1563,7 +1563,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) ((adapter->hw.mac.type >= e1000_pch2lan) && (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) { adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES; - adapter->max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN; + adapter->max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; hw->mac.ops.blink_led = NULL; } @@ -5681,7 +5681,7 @@ const struct e1000_info e1000_ich8_info = { | FLAG_HAS_FLASH | FLAG_APME_IN_WUC, .pba = 8, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5754,7 +5754,7 @@ const struct e1000_info e1000_pch2_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5774,7 +5774,7 @@ const struct e1000_info e1000_pch_lpt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5794,7 +5794,7 @@ const struct e1000_info e1000_pch_spt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c509a5c900f52..68913d1035421 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3807,7 +3807,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* reset Packet Buffer Allocation to default */ ew32(PBA, pba); - if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { + if (adapter->max_frame_size > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) { /* To maintain wire speed transmits, the Tx FIFO should be * large enough to accommodate two full transmit packets, * rounded up to the next 1KB and expressed in KB. Likewise, @@ -4196,9 +4196,9 @@ static int e1000_sw_init(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; adapter->rx_ps_bsize0 = 128; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + adapter->max_frame_size = netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->tx_ring_count = E1000_DEFAULT_TXD; adapter->rx_ring_count = E1000_DEFAULT_RXD; @@ -5781,17 +5781,17 @@ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, static int e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + VLAN_HLEN + ETH_HLEN + ETH_FCS_LEN; + int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; /* Jumbo frame support */ - if ((max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) && + if ((max_frame > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) && !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { e_err("Jumbo Frames not supported.\n"); return -EINVAL; } /* Supported frame sizes */ - if ((new_mtu < ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN) || + if ((new_mtu < (VLAN_ETH_ZLEN + ETH_FCS_LEN)) || (max_frame > adapter->max_hw_frame_size)) { e_err("Unsupported MTU setting\n"); return -EINVAL; @@ -5831,10 +5831,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) adapter->rx_buffer_len = 4096; /* adjust allocation if LPE protects us, and we aren't using SBP */ - if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || - (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN - + ETH_FCS_LEN; + if (max_frame <= (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; if (netif_running(netdev)) e1000e_up(adapter); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index c754b2027281f..c9da1b5d4804d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -216,7 +216,7 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, static inline bool fm10k_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index c2bd4f98a8376..212d668dabb38 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -540,6 +540,7 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va, struct sk_buff *skb); int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); +void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); #ifdef CONFIG_IGB_HWMON void igb_sysfs_exit(struct igb_adapter *adapter); int igb_sysfs_init(struct igb_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index d5673eb90c542..0afc0913e5b91 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2991,6 +2991,7 @@ static int igb_set_channels(struct net_device *netdev, { struct igb_adapter *adapter = netdev_priv(netdev); unsigned int count = ch->combined_count; + unsigned int max_combined = 0; /* Verify they are not requesting separate vectors */ if (!count || ch->rx_count || ch->tx_count) @@ -3001,11 +3002,13 @@ static int igb_set_channels(struct net_device *netdev, return -EINVAL; /* Verify the number of channels doesn't exceed hw limits */ - if (count > igb_max_channels(adapter)) + max_combined = igb_max_channels(adapter); + if (count > max_combined) return -EINVAL; if (count != adapter->rss_queues) { adapter->rss_queues = count; + igb_set_flag_queue_pairs(adapter, max_combined); /* Hardware has to reinitialize queues and interrupts to * match the new configuration. diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a0a9b1fcb5e8e..7dfbcde345093 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1205,10 +1205,14 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; - if (!q_vector) + if (!q_vector) { q_vector = kzalloc(size, GFP_KERNEL); - else + } else if (size > ksize(q_vector)) { + kfree_rcu(q_vector, rcu); + q_vector = kzalloc(size, GFP_KERNEL); + } else { memset(q_vector, 0, size); + } if (!q_vector) return -ENOMEM; @@ -2860,7 +2864,7 @@ static void igb_probe_vfs(struct igb_adapter *adapter) return; pci_sriov_set_totalvfs(pdev, 7); - igb_pci_enable_sriov(pdev, max_vfs); + igb_enable_sriov(pdev, max_vfs); #endif /* CONFIG_PCI_IOV */ } @@ -2901,6 +2905,14 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); + igb_set_flag_queue_pairs(adapter, max_rss_queues); +} + +void igb_set_flag_queue_pairs(struct igb_adapter *adapter, + const u32 max_rss_queues) +{ + struct e1000_hw *hw = &adapter->hw; + /* Determine if we need to pair queues. */ switch (hw->mac.type) { case e1000_82575: @@ -6584,7 +6596,7 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, static inline bool igb_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5be12a00e1f44..463ff47200f1a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1829,7 +1829,7 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, static inline bool ixgbe_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } /** diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e71cdde9cb017..1d7b00b038a2e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -765,7 +765,7 @@ static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, static inline bool ixgbevf_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } /** diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff060..e07afc673d7a8 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -310,6 +310,7 @@ struct mvneta_port { unsigned int link; unsigned int duplex; unsigned int speed; + unsigned int tx_csum_limit; int use_inband_status:1; }; @@ -948,7 +949,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Set CPU queue access map - all CPUs have access to all RX * queues and to all TX queues */ - for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++) + for_each_present_cpu(cpu) mvreg_write(pp, MVNETA_CPU_MAP(cpu), (MVNETA_CPU_RXQ_ACCESS_ALL_MASK | MVNETA_CPU_TXQ_ACCESS_ALL_MASK)); @@ -1013,6 +1014,12 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } mvneta_set_ucast_table(pp, -1); @@ -2502,8 +2509,10 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) dev->mtu = mtu; - if (!netif_running(dev)) + if (!netif_running(dev)) { + netdev_update_features(dev); return 0; + } /* The interface is running, so we have to force a * reallocation of the queues @@ -2532,9 +2541,26 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mvneta_start_dev(pp); mvneta_port_up(pp); + netdev_update_features(dev); + return 0; } +static netdev_features_t mvneta_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct mvneta_port *pp = netdev_priv(dev); + + if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) { + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + netdev_info(dev, + "Disable IP checksum for MTU greater than %dB\n", + pp->tx_csum_limit); + } + + return features; +} + /* Get mac address */ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr) { @@ -2856,6 +2882,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_set_rx_mode = mvneta_set_rx_mode, .ndo_set_mac_address = mvneta_set_mac_addr, .ndo_change_mtu = mvneta_change_mtu, + .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, .ndo_do_ioctl = mvneta_ioctl, }; @@ -3002,8 +3029,8 @@ static int mvneta_probe(struct platform_device *pdev) const char *dt_mac_addr; char hw_mac_addr[ETH_ALEN]; const char *mac_from; + const char *managed; int phy_mode; - int fixed_phy = 0; int err; /* Our multiqueue support is not complete, so for now, only @@ -3037,7 +3064,6 @@ static int mvneta_probe(struct platform_device *pdev) dev_err(&pdev->dev, "cannot register fixed PHY\n"); goto err_free_irq; } - fixed_phy = 1; /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. @@ -3061,8 +3087,10 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); pp->phy_node = phy_node; pp->phy_interface = phy_mode; - pp->use_inband_status = (phy_mode == PHY_INTERFACE_MODE_SGMII) && - fixed_phy; + + err = of_property_read_string(dn, "managed", &managed); + pp->use_inband_status = (err == 0 && + strcmp(managed, "in-band-status") == 0); pp->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pp->clk)) { @@ -3101,6 +3129,9 @@ static int mvneta_probe(struct platform_device *pdev) } } + if (of_device_is_compatible(dn, "marvell,armada-370-neta")) + pp->tx_csum_limit = 1600; + pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; @@ -3179,6 +3210,7 @@ static int mvneta_remove(struct platform_device *pdev) static const struct of_device_id mvneta_match[] = { { .compatible = "marvell,armada-370-neta" }, + { .compatible = "marvell,armada-xp-neta" }, { } }; MODULE_DEVICE_TABLE(of, mvneta_match); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 529ef0594b902..3756e45d8cec6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2382,7 +2382,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } } - memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); + memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 8a083d73efdba..dae2ebb53af72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; +#define MLX4_EN_WRAP_AROUND_SEC 10ULL + +/* This function calculates the max shift that enables the user range + * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. + */ +static u32 freq_to_shift(u16 freq) +{ + u32 freq_khz = freq * 1000; + u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? + max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + /* calculate max possible multiplier in order to fit in 64bit */ + u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + + /* This comes from the reverse of clocksource_khz2mult */ + return ilog2(div_u64(max_mul * freq_khz, 1000000)); +} + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; @@ -247,12 +265,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; mdev->cycles.mask = CLOCKSOURCE_MASK(48); - /* Using shift to make calculation more accurate. Since current HW - * clock frequency is 427 MHz, and cycles are given using a 48 bits - * register, the biggest shift when calculating using u64, is 14 - * (max_cycles * multiplier < 2^64) - */ - mdev->cycles.shift = 14; + mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); mdev->cycles.mult = clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index cf467a9f6cc78..e9189597000de 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1973,10 +1973,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } - if (priv->base_tx_qpn) { - mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num); - priv->base_tx_qpn = 0; - } } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) @@ -2334,8 +2330,6 @@ static void mlx4_en_add_vxlan_offloads(struct work_struct *work) /* set offloads */ priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; - priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2346,8 +2340,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) /* unset offloads */ priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); - priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2944,6 +2936,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + mdev->pndev[port] = dev; mdev->upper[port] = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 0a56f010c8468..760a8b3879128 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -223,11 +223,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_missed_errors = 0; stats->tx_aborted_errors = 0; stats->tx_carrier_errors = 0; stats->tx_fifo_errors = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2a77a6b191216..80aac20104de2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -723,7 +723,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, - int hwtstamp_rx_filter) + netdev_features_t dev_features) { __wsum hw_checksum = 0; @@ -731,14 +731,8 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, hw_checksum = csum_unfold((__force __sum16)cqe->checksum); - if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && - hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { - /* next protocol non IPv4 or IPv6 */ - if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IP) && - ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IPV6)) - return -1; + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) && + !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); hdr += sizeof(struct vlan_hdr); } @@ -901,7 +895,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (ip_summed == CHECKSUM_COMPLETE) { void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); - if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, gro_skb, va, + dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_none++; ring->csum_complete--; @@ -956,7 +951,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, skb, skb->data, dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_complete--; ring->csum_none++; @@ -1261,8 +1256,6 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->hash_fn = MLX4_RSS_HASH_TOP; memcpy(rss_context->rss_key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); - netdev_rss_key_fill(rss_context->rss_key, - MLX4_EN_RSS_KEY_SIZE); } else { en_err(priv, "Unknown RSS hash function requested\n"); err = -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7bed3a88579fa..c10d98f6ad967 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -66,6 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->stride = stride; + ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node); @@ -180,6 +181,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); + mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); @@ -231,6 +233,11 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } +static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) +{ + return ring->prod - ring->cons > ring->full_size; +} + static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) @@ -473,11 +480,10 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netdev_tx_completed_queue(ring->tx_queue, packets, bytes); - /* - * Wakeup Tx queue if this stopped, and at least 1 packet - * was completed + /* Wakeup Tx queue if this stopped, and ring is not full. */ - if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) { + if (netif_tx_queue_stopped(ring->tx_queue) && + !mlx4_en_is_tx_ring_full(ring)) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } @@ -921,8 +927,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); /* Check available TXBBs And 2K spare for prefetch */ - stop_queue = (int)(ring->prod - ring_cons) > - ring->size - HEADROOM - MAX_DESC_TXBBS; + stop_queue = mlx4_en_is_tx_ring_full(ring); if (unlikely(stop_queue)) { netif_tx_stop_queue(ring->tx_queue); ring->queue_stopped++; @@ -991,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) smp_rmb(); ring_cons = ACCESS_ONCE(ring->cons); - if (unlikely(((int)(ring->prod - ring_cons)) <= - ring->size - HEADROOM - MAX_DESC_TXBBS)) { + if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 2619c9fbf42df..337811d208bd0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -185,7 +185,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) return; } - memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); + memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ dma_wmb(); @@ -573,7 +573,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n", __func__, i, port); - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( @@ -608,7 +608,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; if (i == mlx4_master_func_num(dev)) continue; - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 6fce587188372..0d80aed590437 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -93,8 +93,14 @@ int mlx4_register_interface(struct mlx4_interface *intf) mutex_lock(&intf_mutex); list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) + list_for_each_entry(priv, &dev_list, dev_list) { + if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) { + mlx4_dbg(&priv->dev, + "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol); + intf->flags &= ~MLX4_INTFF_BONDING; + } mlx4_add_device(intf, priv); + } mutex_unlock(&intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d021f079f181b..909fcf803c542 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -279,6 +279,7 @@ struct mlx4_en_tx_ring { u32 size; /* number of TXBBs */ u32 size_mask; u16 stride; + u32 full_size; u16 cqn; /* index of port CQ associated with this ring */ u32 buf_size; __be32 doorbell_qpn; @@ -579,7 +580,6 @@ struct mlx4_en_priv { int vids[128]; bool wol; struct device *ddev; - int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 6af028d5f9bcb..97e4df9bf407b 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -736,9 +736,8 @@ qcaspi_netdev_tx_timeout(struct net_device *dev) netdev_info(qca->net_dev, "Transmit timeout at %ld, latency %ld\n", jiffies, jiffies - dev->trans_start); qca->net_dev->stats.tx_errors++; - /* wake the queue if there is room */ - if (qcaspi_tx_ring_has_space(&qca->txr)) - netif_wake_queue(dev); + /* Trigger tx queue flush and QCA7000 reset */ + qca->sync = QCASPI_SYNC_UNKNOWN; } static int diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7fb244f565b28..13463c4acc86f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1481,6 +1481,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) if (mdp->cd->shift_rd0) desc_status >>= 16; + skb = mdp->rx_skbuff[entry]; if (desc_status & (RD_RFS1 | RD_RFS2 | RD_RFS3 | RD_RFS4 | RD_RFS5 | RD_RFS6 | RD_RFS10)) { ndev->stats.rx_errors++; @@ -1496,12 +1497,11 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) ndev->stats.rx_missed_errors++; if (desc_status & RD_RFS10) ndev->stats.rx_over_errors++; - } else { + } else if (skb) { if (!mdp->cd->hw_swap) sh_eth_soft_swap( phys_to_virt(ALIGN(rxdesc->addr, 4)), pkt_len + 2); - skb = mdp->rx_skbuff[entry]; mdp->rx_skbuff[entry] = NULL; if (mdp->cd->rpadir) skb_reserve(skb, NET_IP_ALIGN); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index cf98cc9bbc8dc..4fedf7fa72c46 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -3384,12 +3384,14 @@ static void rocker_port_fdb_learn_work(struct work_struct *work) info.addr = lw->addr; info.vid = lw->vid; + rtnl_lock(); if (learned && removing) call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL, lw->dev, &info.info); else if (learned && !removing) call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD, lw->dev, &info.info); + rtnl_unlock(); kfree(work); } @@ -4587,6 +4589,7 @@ static void rocker_remove_ports(struct rocker *rocker) rocker_port = rocker->ports[i]; rocker_port_ig_tbl(rocker_port, ROCKER_OP_FLAG_REMOVE); unregister_netdev(rocker_port->dev); + free_netdev(rocker_port->dev); } kfree(rocker->ports); } diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fbb6cfa0f5f1d..feca46efa12f8 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1344,7 +1344,9 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) unsigned int write_ptr; efx_qword_t *txd; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index bb89e96a125ea..6d4e0047a31d5 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -319,7 +319,9 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue) unsigned write_ptr; unsigned old_write_count = tx_queue->write_count; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 325dd94bca465..0bdef4a074dd3 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -218,6 +218,7 @@ struct efx_tx_buffer { * @tso_packets: Number of packets via the TSO xmit path * @pushes: Number of times the TX push feature has been used * @pio_packets: Number of times the TX PIO feature has been used + * @xmit_more_available: Are any packets waiting to be pushed to the NIC * @empty_read_count: If the completion path has seen the queue as empty * and the transmission path has not yet checked this, the value of * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. @@ -250,6 +251,7 @@ struct efx_tx_queue { unsigned int tso_packets; unsigned int pushes; unsigned int pio_packets; + bool xmit_more_available; /* Statistics to supplement MAC stats */ unsigned long tx_packets; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index aaf2987512b5d..e70edc3dea7e0 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -431,8 +431,20 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those + * SKBs had skb->xmit_more set. If we do not push those they + * could be left for a long time and cause a netdev watchdog. + */ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tx_packets++; @@ -721,6 +733,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->read_count = 0; tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_more_available = false; /* Set up TX descriptor ring */ efx_nic_init_tx(tx_queue); @@ -746,6 +759,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) ++tx_queue->read_count; } + tx_queue->xmit_more_available = false; netdev_tx_reset_queue(tx_queue->core_txq); } @@ -1301,8 +1315,20 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those + * SKBs had skb->xmit_more set. If we do not push those they + * could be left for a long time and cause a netdev watchdog. + */ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tso_bursts++; return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index ad39960380180..799c2929c5365 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -158,6 +158,8 @@ struct dma_desc { u32 buffer2_size:13; u32 reserved4:3; } etx; /* -- enhanced -- */ + + u64 all_flags; } des01; unsigned int des2; unsigned int des3; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6249a4ec08f05..573708123338f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -38,7 +38,6 @@ struct rk_priv_data { bool clock_input; struct clk *clk_mac; - struct clk *clk_mac_pll; struct clk *gmac_clkin; struct clk *mac_clk_rx; struct clk *mac_clk_tx; @@ -208,7 +207,7 @@ static int gmac_clk_init(struct rk_priv_data *bsp_priv) dev_info(dev, "%s: clock input from PHY\n", __func__); } else { if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) - clk_set_rate(bsp_priv->clk_mac_pll, 50000000); + clk_set_rate(bsp_priv->clk_mac, 50000000); } return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 1e2bcf5f89e13..7d944449f5eff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -240,6 +240,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.erx.own = 1; p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; @@ -254,7 +255,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.etx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ehn_desc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 35ad4f427ae2b..48c3456445b28 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -123,6 +123,7 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.rx.own = 1; p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; @@ -137,7 +138,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.tx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ndesc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 771cda2a48b2a..2e51b816a7e81 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,10 +721,13 @@ static int stmmac_get_ts_info(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if ((priv->hwts_tx_en) && (priv->hwts_rx_en)) { + if ((priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) { - info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; if (priv->ptp_clock) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2c5ce2baca871..c274cdc5df1ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -829,8 +829,11 @@ static int stmmac_init_phy(struct net_device *dev) phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface); - if (IS_ERR(phydev)) { + if (IS_ERR_OR_NULL(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); + if (!phydev) + return -ENODEV; + return PTR_ERR(phydev); } @@ -1189,41 +1192,41 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv) goto err_tx_skbuff; if (priv->extend_desc) { - priv->dma_erx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct - dma_extended_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_erx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct + dma_extended_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_erx) goto err_dma; - priv->dma_etx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct - dma_extended_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_etx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct + dma_extended_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_etx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_extended_desc), - priv->dma_erx, priv->dma_rx_phy); + sizeof(struct dma_extended_desc), + priv->dma_erx, priv->dma_rx_phy); goto err_dma; } } else { - priv->dma_rx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct dma_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_rx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct dma_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_rx) goto err_dma; - priv->dma_tx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct dma_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_tx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct dma_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_tx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_desc), - priv->dma_rx, priv->dma_rx_phy); + sizeof(struct dma_desc), + priv->dma_rx, priv->dma_rx_phy); goto err_dma; } } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ada..4dba5fbc735e3 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -82,7 +82,7 @@ static const struct proto_ops macvtap_socket_ops; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) -#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG) +#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev) { @@ -1054,10 +1054,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return 0; case TUNSETSNDBUF: - if (get_user(u, up)) + if (get_user(s, sp)) return -EFAULT; - q->sk.sk_sndbuf = u; + q->sk.sk_sndbuf = s; return 0; case TUNGETVNETHDRSZ: diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9c71295f2fefb..85e640440bd9b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -675,7 +675,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5461, 0xfffffff0 }, { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, - { PHY_ID_BCM5482, 0xfffffff0 }, + { PHY_ID_BCM5481, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 00cb41e713123..c56cf0b86f2c8 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -833,6 +833,11 @@ static void decode_rxts(struct dp83640_private *dp83640, struct skb_shared_hwtstamps *shhwtstamps = NULL; struct sk_buff *skb; unsigned long flags; + u8 overflow; + + overflow = (phy_rxts->ns_hi >> 14) & 0x3; + if (overflow) + pr_debug("rx timestamp queue overflow, count %d\n", overflow); spin_lock_irqsave(&dp83640->rx_lock, flags); @@ -875,6 +880,7 @@ static void decode_txts(struct dp83640_private *dp83640, struct skb_shared_hwtstamps shhwtstamps; struct sk_buff *skb; u64 ns; + u8 overflow; /* We must already have the skb that triggered this. */ @@ -884,6 +890,17 @@ static void decode_txts(struct dp83640_private *dp83640, pr_debug("have timestamp but tx_queue empty\n"); return; } + + overflow = (phy_txts->ns_hi >> 14) & 0x3; + if (overflow) { + pr_debug("tx timestamp queue overflow, count %d\n", overflow); + while (skb) { + skb_complete_tx_timestamp(skb, NULL); + skb = skb_dequeue(&dp83640->tx_queue); + } + return; + } + ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(ns); diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 1960b46add65b..479b93f9581c4 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -52,6 +52,10 @@ static int fixed_phy_update_regs(struct fixed_phy *fp) u16 lpagb = 0; u16 lpa = 0; + if (!fp->status.link) + goto done; + bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE; + if (fp->status.duplex) { bmcr |= BMCR_FULLDPLX; @@ -96,15 +100,13 @@ static int fixed_phy_update_regs(struct fixed_phy *fp) } } - if (fp->status.link) - bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE; - if (fp->status.pause) lpa |= LPA_PAUSE_CAP; if (fp->status.asym_pause) lpa |= LPA_PAUSE_ASYM; +done: fp->regs[MII_PHYSID1] = 0; fp->regs[MII_PHYSID2] = 0; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index bdfe51fc3a650..d551df62e61a3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -796,10 +796,11 @@ static int genphy_config_advert(struct phy_device *phydev) if (phydev->supported & (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) { adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); - if (adv != oldadv) - changed = 1; } + if (adv != oldadv) + changed = 1; + err = phy_write(phydev, MII_CTRL1000, adv); if (err < 0) return err; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9d15566521a71..cfe49a07c7c12 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -715,10 +715,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) val &= 0xffff; } vj = slhc_init(val2+1, val+1); - if (!vj) { - netdev_err(ppp->dev, - "PPP: no memory (VJ compressor)\n"); - err = -ENOMEM; + if (IS_ERR(vj)) { + err = PTR_ERR(vj); break; } ppp_lock(ppp); diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index b62a5e3a1c652..d1c4bc1c4df09 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -313,7 +313,6 @@ static void pppoe_flush_dev(struct net_device *dev) if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { pppox_unbind_sock(sk); - sk->sk_state = PPPOX_ZOMBIE; sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); @@ -396,6 +395,8 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_put; + + sock_put(sk_pppox(relay_po)); } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; @@ -569,6 +570,9 @@ static int pppoe_create(struct net *net, struct socket *sock) sk->sk_family = PF_PPPOX; sk->sk_protocol = PX_PROTO_OE; + INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work, + pppoe_unbind_sock_work); + return 0; } @@ -590,7 +594,7 @@ static int pppoe_release(struct socket *sock) po = pppox_sk(sk); - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { + if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } @@ -633,8 +637,6 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); - INIT_WORK(&po->proto.pppoe.padt_work, pppoe_unbind_sock_work); - error = -EINVAL; if (sp->sa_protocol != PX_PROTO_OE) goto end; @@ -664,8 +666,13 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, po->pppoe_dev = NULL; } - memset(sk_pppox(po) + 1, 0, - sizeof(struct pppox_sock) - sizeof(struct sock)); + po->pppoe_ifindex = 0; + memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); + memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); + memset(&po->chan, 0, sizeof(po->chan)); + po->next = NULL; + po->num = 0; + sk->sk_state = PPPOX_NONE; } diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index e3bfbd4d01367..b35199cc8f34d 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -131,24 +131,27 @@ static int lookup_chan_dst(u16 call_id, __be32 d_addr) return i < MAX_CALLID; } -static int add_chan(struct pppox_sock *sock) +static int add_chan(struct pppox_sock *sock, + struct pptp_addr *sa) { static int call_id; spin_lock(&chan_lock); - if (!sock->proto.pptp.src_addr.call_id) { + if (!sa->call_id) { call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); if (call_id == MAX_CALLID) { call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); if (call_id == MAX_CALLID) goto out_err; } - sock->proto.pptp.src_addr.call_id = call_id; - } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) + sa->call_id = call_id; + } else if (test_bit(sa->call_id, callid_bitmap)) { goto out_err; + } - set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); - rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); + sock->proto.pptp.src_addr = *sa; + set_bit(sa->call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sa->call_id], sock); spin_unlock(&chan_lock); return 0; @@ -417,15 +420,29 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; struct pppox_sock *po = pppox_sk(sk); - struct pptp_opt *opt = &po->proto.pptp; int error = 0; + if (sockaddr_len < sizeof(struct sockaddr_pppox)) + return -EINVAL; + lock_sock(sk); - opt->src_addr = sp->sa_addr.pptp; - if (add_chan(po)) + if (sk->sk_state & PPPOX_DEAD) { + error = -EALREADY; + goto out; + } + + if (sk->sk_state & PPPOX_BOUND) { error = -EBUSY; + goto out; + } + + if (add_chan(po, &sp->sa_addr.pptp)) + error = -EBUSY; + else + sk->sk_state |= PPPOX_BOUND; +out: release_sock(sk); return error; } @@ -441,6 +458,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, struct flowi4 fl4; int error = 0; + if (sockaddr_len < sizeof(struct sockaddr_pppox)) + return -EINVAL; + if (sp->sa_protocol != PX_PROTO_PPTP) return -EINVAL; @@ -493,7 +513,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, } opt->dst_addr = sp->sa_addr.pptp; - sk->sk_state = PPPOX_CONNECTED; + sk->sk_state |= PPPOX_CONNECTED; end: release_sock(sk); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 079f7adfcde5e..27ed25252aac5 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -84,8 +84,9 @@ static long decode(unsigned char **cpp); static unsigned char * put16(unsigned char *cp, unsigned short x); static unsigned short pull16(unsigned char **cpp); -/* Initialize compression data structure +/* Allocate compression data structure * slots must be in range 0 to 255 (zero meaning no compression) + * Returns pointer to structure or ERR_PTR() on error. */ struct slcompress * slhc_init(int rslots, int tslots) @@ -94,11 +95,14 @@ slhc_init(int rslots, int tslots) register struct cstate *ts; struct slcompress *comp; + if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) + return ERR_PTR(-EINVAL); + comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); if (! comp) goto out_fail; - if ( rslots > 0 && rslots < 256 ) { + if (rslots > 0) { size_t rsize = rslots * sizeof(struct cstate); comp->rstate = kzalloc(rsize, GFP_KERNEL); if (! comp->rstate) @@ -106,7 +110,7 @@ slhc_init(int rslots, int tslots) comp->rslot_limit = rslots - 1; } - if ( tslots > 0 && tslots < 256 ) { + if (tslots > 0) { size_t tsize = tslots * sizeof(struct cstate); comp->tstate = kzalloc(tsize, GFP_KERNEL); if (! comp->tstate) @@ -141,7 +145,7 @@ slhc_init(int rslots, int tslots) out_free: kfree(comp); out_fail: - return NULL; + return ERR_PTR(-ENOMEM); } diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 05387b1e2e95e..a17d86a577347 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -164,7 +164,7 @@ static int sl_alloc_bufs(struct slip *sl, int mtu) if (cbuff == NULL) goto err_exit; slcomp = slhc_init(16, 16); - if (slcomp == NULL) + if (IS_ERR(slcomp)) goto err_exit; #endif spin_lock_bh(&sl->lock); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6928448f6b7f1..2b45d0168c3c1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1845,10 +1845,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) struct team *team = netdev_priv(dev); struct team_port *port; - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - rcu_read_unlock(); + mutex_unlock(&team->lock); return 0; } diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e4b7a47a825c7..5efaa9ab5af59 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -100,7 +100,7 @@ static const struct net_device_ops cdc_mbim_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_change_mtu = usbnet_change_mtu, + .ndo_change_mtu = cdc_ncm_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = cdc_mbim_rx_add_vid, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8067b8fbb0eea..0b481c30979b6 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -687,6 +688,33 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) kfree(ctx); } +/* we need to override the usbnet change_mtu ndo for two reasons: + * - respect the negotiated maximum datagram size + * - avoid unwanted changes to rx and tx buffers + */ +int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) +{ + struct usbnet *dev = netdev_priv(net); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + int maxmtu = ctx->max_datagram_size - cdc_ncm_eth_hlen(dev); + + if (new_mtu <= 0 || new_mtu > maxmtu) + return -EINVAL; + net->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu); + +static const struct net_device_ops cdc_ncm_netdev_ops = { + .ndo_open = usbnet_open, + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_change_mtu = cdc_ncm_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) { const struct usb_cdc_union_desc *union_desc = NULL; @@ -861,6 +889,9 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ /* add our sysfs attrs */ dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group; + /* must handle MTU changes */ + dev->net->netdev_ops = &cdc_ncm_netdev_ops; + return 0; error2: diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f603f362504bc..cffb25280a3b7 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -542,6 +542,7 @@ static const struct usb_device_id products[] = { /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ + {QMI_FIXED_INTF(0x05c6, 0x6001, 3)}, /* 4G LTE usb-modem U901 */ {QMI_FIXED_INTF(0x05c6, 0x7000, 0)}, {QMI_FIXED_INTF(0x05c6, 0x7001, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7002, 1)}, @@ -764,12 +765,17 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ + {QMI_FIXED_INTF(0x1199, 0x9070, 8)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9070, 10)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ + {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index aafa1a1898e43..ce6fad1c43e64 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3006,17 +3006,6 @@ static int rtl8152_open(struct net_device *netdev) mutex_lock(&tp->control); - /* The WORK_ENABLE may be set when autoresume occurs */ - if (test_bit(WORK_ENABLE, &tp->flags)) { - clear_bit(WORK_ENABLE, &tp->flags); - usb_kill_urb(tp->intr_urb); - cancel_delayed_work_sync(&tp->schedule); - - /* disable the tx/rx, if the workqueue has enabled them. */ - if (netif_carrier_ok(netdev)) - tp->rtl_ops.disable(tp); - } - tp->rtl_ops.up(tp); rtl8152_set_speed(tp, AUTONEG_ENABLE, @@ -3063,12 +3052,6 @@ static int rtl8152_close(struct net_device *netdev) } else { mutex_lock(&tp->control); - /* The autosuspend may have been enabled and wouldn't - * be disable when autoresume occurs, because the - * netif_running() would be false. - */ - rtl_runtime_suspend_enable(tp, false); - tp->rtl_ops.down(tp); mutex_unlock(&tp->control); @@ -3369,7 +3352,7 @@ static int rtl8152_resume(struct usb_interface *intf) netif_device_attach(tp->netdev); } - if (netif_running(tp->netdev)) { + if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) { if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); @@ -3387,6 +3370,8 @@ static int rtl8152_resume(struct usb_interface *intf) } usb_submit_urb(tp->intr_urb, GFP_KERNEL); } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { + if (tp->netdev->flags & IFF_UP) + rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3c86b107275a8..e0498571ae267 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -778,7 +778,7 @@ int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct driver_info *info = dev->driver_info; - int retval, pm; + int retval, pm, mpn; clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); @@ -809,6 +809,8 @@ int usbnet_stop (struct net_device *net) usbnet_purge_paused_rxq(dev); + mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags); + /* deferred work (task, timer, softirq) must also stop. * can't flush_scheduled_work() until we drop rtnl (later), * else workers could deadlock; so make workers a NOP. @@ -819,8 +821,7 @@ int usbnet_stop (struct net_device *net) if (!pm) usb_autopm_put_interface(dev->intf); - if (info->manage_power && - !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) + if (info->manage_power && mpn) info->manage_power(dev, 0); else usb_autopm_put_interface(dev->intf); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index c8186ffda1a31..2e61a799f32a9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -117,12 +117,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); goto drop; } - /* don't change ip_summed == CHECKSUM_PARTIAL, as that - * will cause bad checksum on forwarded packets - */ - if (skb->ip_summed == CHECKSUM_NONE && - rcv->features & NETIF_F_RXCSUM) - skb->ip_summed = CHECKSUM_UNNECESSARY; if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 63c7810e1545a..237f8e5e493dd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1756,9 +1756,9 @@ static int virtnet_probe(struct virtio_device *vdev) /* Do we support "hardware" checksums? */ if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ - dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (csum) - dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO @@ -1828,7 +1828,8 @@ static int virtnet_probe(struct virtio_device *vdev) else vi->hdr_len = sizeof(struct virtio_net_hdr); - if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT)) + if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->any_header_sg = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 21a0fbf1ed947..940f78e419932 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2212,6 +2212,8 @@ static int vxlan_open(struct net_device *dev) if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { ret = vxlan_igmp_join(vxlan); + if (ret == -EADDRINUSE) + ret = 0; if (ret) { vxlan_sock_release(vs); return ret; @@ -2579,7 +2581,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); - struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; struct vxlan_rdst *dst = &vxlan->default_dst; __u32 vni; int err; @@ -2712,9 +2714,13 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL; - if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, - vxlan->dst_port, vxlan->flags)) { - pr_info("duplicate VNI %u\n", vni); + list_for_each_entry(tmp, &vn->vxlan_list, next) { + if (tmp->default_dst.remote_vni == vni && + (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || + tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 && + tmp->dst_port == vxlan->dst_port && + (tmp->flags & VXLAN_F_RCV_FLAGS) == + (vxlan->flags & VXLAN_F_RCV_FLAGS)) return -EEXIST; } diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 2fd9e180272b3..c5dc6b57212e4 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -163,8 +163,10 @@ int ath10k_htc_send(struct ath10k_htc *htc, skb_cb->eid = eid; skb_cb->paddr = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE); ret = dma_mapping_error(dev, skb_cb->paddr); - if (ret) + if (ret) { + ret = -EIO; goto err_credits; + } sg_item.transfer_id = ep->eid; sg_item.transfer_context = skb; diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index cbd2bc9e62025..7f4854a52a7c1 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -371,8 +371,10 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); - if (res) + if (res) { + res = -EIO; goto err_free_txdesc; + } skb_put(txdesc, len); cmd = (struct htt_cmd *)txdesc->data; @@ -463,8 +465,10 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); - if (res) + if (res) { + res = -EIO; goto err_free_txbuf; + } if (likely(use_frags)) { frags = skb_cb->htt.txbuf->frags; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 973485bd4121e..1734cc50ded81 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3183,7 +3183,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed) static u32 get_nss_from_chainmask(u16 chain_mask) { - if ((chain_mask & 0x15) == 0x15) + if ((chain_mask & 0xf) == 0xf) return 4; else if ((chain_mask & 0x7) == 0x7) return 3; @@ -4464,6 +4464,21 @@ static int ath10k_set_rts_threshold(struct ieee80211_hw *hw, u32 value) return ret; } +static int ath10k_mac_op_set_frag_threshold(struct ieee80211_hw *hw, u32 value) +{ + /* Even though there's a WMI enum for fragmentation threshold no known + * firmware actually implements it. Moreover it is not possible to rely + * frame fragmentation to mac80211 because firmware clears the "more + * fragments" bit in frame control making it impossible for remote + * devices to reassemble frames. + * + * Hence implement a dummy callback just to say fragmentation isn't + * supported. This effectively prevents mac80211 from doing frame + * fragmentation in software. + */ + return -EOPNOTSUPP; +} + static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { @@ -5108,6 +5123,7 @@ static const struct ieee80211_ops ath10k_ops = { .remain_on_channel = ath10k_remain_on_channel, .cancel_remain_on_channel = ath10k_cancel_remain_on_channel, .set_rts_threshold = ath10k_set_rts_threshold, + .set_frag_threshold = ath10k_mac_op_set_frag_threshold, .flush = ath10k_flush, .tx_last_beacon = ath10k_tx_last_beacon, .set_antenna = ath10k_set_antenna, diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 7681237fe298a..3c4c800ab5051 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1378,8 +1378,10 @@ static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar, req_paddr = dma_map_single(ar->dev, treq, req_len, DMA_TO_DEVICE); ret = dma_mapping_error(ar->dev, req_paddr); - if (ret) + if (ret) { + ret = -EIO; goto err_dma; + } if (resp && resp_len) { tresp = kzalloc(*resp_len, GFP_KERNEL); @@ -1391,8 +1393,10 @@ static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar, resp_paddr = dma_map_single(ar->dev, tresp, *resp_len, DMA_FROM_DEVICE); ret = dma_mapping_error(ar->dev, resp_paddr); - if (ret) + if (ret) { + ret = EIO; goto err_req; + } xfer.wait_for_resp = true; xfer.resp_len = 0; @@ -1524,12 +1528,11 @@ static int ath10k_pci_get_num_banks(struct ath10k *ar) switch (MS(ar->chip_id, SOC_CHIP_ID_REV)) { case QCA6174_HW_1_0_CHIP_ID_REV: case QCA6174_HW_1_1_CHIP_ID_REV: + case QCA6174_HW_2_1_CHIP_ID_REV: + case QCA6174_HW_2_2_CHIP_ID_REV: return 3; case QCA6174_HW_1_3_CHIP_ID_REV: return 2; - case QCA6174_HW_2_1_CHIP_ID_REV: - case QCA6174_HW_2_2_CHIP_ID_REV: - return 6; case QCA6174_HW_3_0_CHIP_ID_REV: case QCA6174_HW_3_1_CHIP_ID_REV: case QCA6174_HW_3_2_CHIP_ID_REV: diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index c7ea77edce245..408ecd98e61be 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2517,6 +2517,7 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to map beacon: %d\n", ret); dev_kfree_skb_any(bcn); + ret = -EIO; goto skip; } diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index e82a0d4ce23f9..5dbc617ecf8a8 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -440,9 +440,9 @@ static inline void ath9k_htc_stop_btcoex(struct ath9k_htc_priv *priv) } #endif /* CONFIG_ATH9K_BTCOEX_SUPPORT */ -#define OP_BT_PRIORITY_DETECTED BIT(3) -#define OP_BT_SCAN BIT(4) -#define OP_TSF_RESET BIT(6) +#define OP_BT_PRIORITY_DETECTED 3 +#define OP_BT_SCAN 4 +#define OP_TSF_RESET 6 enum htc_op_flags { HTC_FWFLAG_NO_RMW, diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index f8d11efa7b0f1..46a389c20bfc1 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -874,6 +874,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) hw->max_rate_tries = 10; hw->sta_data_size = sizeof(struct ath_node); hw->vif_data_size = sizeof(struct ath_vif); + hw->extra_tx_headroom = 4; hw->wiphy->available_antennas_rx = BIT(ah->caps.max_rxchains) - 1; hw->wiphy->available_antennas_tx = BIT(ah->caps.max_txchains) - 1; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b0badef71ce79..d5f2fbf62d726 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -216,11 +216,13 @@ static bool ath_prepare_reset(struct ath_softc *sc) ath_stop_ani(sc); ath9k_hw_disable_interrupts(ah); - if (!ath_drain_all_txq(sc)) - ret = false; - - if (!ath_stoprecv(sc)) - ret = false; + if (AR_SREV_9300_20_OR_LATER(ah)) { + ret &= ath_stoprecv(sc); + ret &= ath_drain_all_txq(sc); + } else { + ret &= ath_drain_all_txq(sc); + ret &= ath_stoprecv(sc); + } return ret; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b2f9521fe551a..4cdac7801c8be 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -5365,6 +5365,10 @@ static void b43_supported_bands(struct b43_wldev *dev, bool *have_2ghz_phy, *have_5ghz_phy = true; return; case 0x4321: /* BCM4306 */ + /* There are 14e4:4321 PCI devs with 2.4 GHz BCM4321 (N-PHY) */ + if (dev->phy.type != B43_PHYTYPE_G) + break; + /* fall through */ case 0x4313: /* BCM4311 */ case 0x431a: /* BCM4318 */ case 0x432a: /* BCM4321 */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c new file mode 100644 index 0000000000000..d60a467a983c6 --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -0,0 +1,2717 @@ +/****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, + * USA + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * Contact Information: + * Intel Linux Wireless + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + * BSD LICENSE + * + * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iwl-drv.h" +#include "iwl-trans.h" +#include "iwl-csr.h" +#include "iwl-prph.h" +#include "iwl-scd.h" +#include "iwl-agn-hw.h" +#include "iwl-fw-error-dump.h" +#include "internal.h" +#include "iwl-fh.h" + +/* extended range in FW SRAM */ +#define IWL_FW_MEM_EXTENDED_START 0x40000 +#define IWL_FW_MEM_EXTENDED_END 0x57FFF + +static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + if (!trans_pcie->fw_mon_page) + return; + + dma_unmap_page(trans->dev, trans_pcie->fw_mon_phys, + trans_pcie->fw_mon_size, DMA_FROM_DEVICE); + __free_pages(trans_pcie->fw_mon_page, + get_order(trans_pcie->fw_mon_size)); + trans_pcie->fw_mon_page = NULL; + trans_pcie->fw_mon_phys = 0; + trans_pcie->fw_mon_size = 0; +} + +static void iwl_pcie_alloc_fw_monitor(struct iwl_trans *trans, u8 max_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct page *page = NULL; + dma_addr_t phys; + u32 size = 0; + u8 power; + + if (!max_power) { + /* default max_power is maximum */ + max_power = 26; + } else { + max_power += 11; + } + + if (WARN(max_power > 26, + "External buffer size for monitor is too big %d, check the FW TLV\n", + max_power)) + return; + + if (trans_pcie->fw_mon_page) { + dma_sync_single_for_device(trans->dev, trans_pcie->fw_mon_phys, + trans_pcie->fw_mon_size, + DMA_FROM_DEVICE); + return; + } + + phys = 0; + for (power = max_power; power >= 11; power--) { + int order; + + size = BIT(power); + order = get_order(size); + page = alloc_pages(__GFP_COMP | __GFP_NOWARN | __GFP_ZERO, + order); + if (!page) + continue; + + phys = dma_map_page(trans->dev, page, 0, PAGE_SIZE << order, + DMA_FROM_DEVICE); + if (dma_mapping_error(trans->dev, phys)) { + __free_pages(page, order); + page = NULL; + continue; + } + IWL_INFO(trans, + "Allocated 0x%08x bytes (order %d) for firmware monitor.\n", + size, order); + break; + } + + if (WARN_ON_ONCE(!page)) + return; + + if (power != max_power) + IWL_ERR(trans, + "Sorry - debug buffer is only %luK while you requested %luK\n", + (unsigned long)BIT(power - 10), + (unsigned long)BIT(max_power - 10)); + + trans_pcie->fw_mon_page = page; + trans_pcie->fw_mon_phys = phys; + trans_pcie->fw_mon_size = size; +} + +static u32 iwl_trans_pcie_read_shr(struct iwl_trans *trans, u32 reg) +{ + iwl_write32(trans, HEEP_CTRL_WRD_PCIEX_CTRL_REG, + ((reg & 0x0000ffff) | (2 << 28))); + return iwl_read32(trans, HEEP_CTRL_WRD_PCIEX_DATA_REG); +} + +static void iwl_trans_pcie_write_shr(struct iwl_trans *trans, u32 reg, u32 val) +{ + iwl_write32(trans, HEEP_CTRL_WRD_PCIEX_DATA_REG, val); + iwl_write32(trans, HEEP_CTRL_WRD_PCIEX_CTRL_REG, + ((reg & 0x0000ffff) | (3 << 28))); +} + +static void iwl_pcie_set_pwr(struct iwl_trans *trans, bool vaux) +{ + if (trans->cfg->apmg_not_supported) + return; + + if (vaux && pci_pme_capable(to_pci_dev(trans->dev), PCI_D3cold)) + iwl_set_bits_mask_prph(trans, APMG_PS_CTRL_REG, + APMG_PS_CTRL_VAL_PWR_SRC_VAUX, + ~APMG_PS_CTRL_MSK_PWR_SRC); + else + iwl_set_bits_mask_prph(trans, APMG_PS_CTRL_REG, + APMG_PS_CTRL_VAL_PWR_SRC_VMAIN, + ~APMG_PS_CTRL_MSK_PWR_SRC); +} + +/* PCI registers */ +#define PCI_CFG_RETRY_TIMEOUT 0x041 + +static void iwl_pcie_apm_config(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u16 lctl; + u16 cap; + + /* + * HW bug W/A for instability in PCIe bus L0S->L1 transition. + * Check if BIOS (or OS) enabled L1-ASPM on this device. + * If so (likely), disable L0S, so device moves directly L0->L1; + * costs negligible amount of power savings. + * If not (unlikely), enable L0S, so there is at least some + * power savings, even without L1. + */ + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); + if (lctl & PCI_EXP_LNKCTL_ASPM_L1) + iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); + else + iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); + trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); + + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); + trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN; + dev_info(trans->dev, "L1 %sabled - LTR %sabled\n", + (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis", + trans->ltr_enabled ? "En" : "Dis"); +} + +/* + * Start up NIC's basic functionality after it has been reset + * (e.g. after platform boot, or shutdown via iwl_pcie_apm_stop()) + * NOTE: This does not load uCode nor start the embedded processor + */ +static int iwl_pcie_apm_init(struct iwl_trans *trans) +{ + int ret = 0; + IWL_DEBUG_INFO(trans, "Init card's basic functions\n"); + + /* + * Use "set_bit" below rather than "write", to preserve any hardware + * bits already set by default after reset. + */ + + /* Disable L0S exit timer (platform NMI Work/Around) */ + if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) + iwl_set_bit(trans, CSR_GIO_CHICKEN_BITS, + CSR_GIO_CHICKEN_BITS_REG_BIT_DIS_L0S_EXIT_TIMER); + + /* + * Disable L0s without affecting L1; + * don't wait for ICH L0s (ICH bug W/A) + */ + iwl_set_bit(trans, CSR_GIO_CHICKEN_BITS, + CSR_GIO_CHICKEN_BITS_REG_BIT_L1A_NO_L0S_RX); + + /* Set FH wait threshold to maximum (HW error during stress W/A) */ + iwl_set_bit(trans, CSR_DBG_HPET_MEM_REG, CSR_DBG_HPET_MEM_REG_VAL); + + /* + * Enable HAP INTA (interrupt from management bus) to + * wake device's PCI Express link L1a -> L0s + */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_BIT_HAP_WAKE_L1A); + + iwl_pcie_apm_config(trans); + + /* Configure analog phase-lock-loop before activating to D0A */ + if (trans->cfg->base_params->pll_cfg_val) + iwl_set_bit(trans, CSR_ANA_PLL_CFG, + trans->cfg->base_params->pll_cfg_val); + + /* + * Set "initialization complete" bit to move adapter from + * D0U* --> D0A* (powered-up active) state. + */ + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + /* + * Wait for clock stabilization; once stabilized, access to + * device-internal resources is supported, e.g. iwl_write_prph() + * and accesses to uCode SRAM. + */ + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000); + if (ret < 0) { + IWL_DEBUG_INFO(trans, "Failed to init the card\n"); + goto out; + } + + if (trans->cfg->host_interrupt_operation_mode) { + /* + * This is a bit of an abuse - This is needed for 7260 / 3160 + * only check host_interrupt_operation_mode even if this is + * not related to host_interrupt_operation_mode. + * + * Enable the oscillator to count wake up time for L1 exit. This + * consumes slightly more power (100uA) - but allows to be sure + * that we wake up from L1 on time. + * + * This looks weird: read twice the same register, discard the + * value, set a bit, and yet again, read that same register + * just to discard the value. But that's the way the hardware + * seems to like it. + */ + iwl_read_prph(trans, OSC_CLK); + iwl_read_prph(trans, OSC_CLK); + iwl_set_bits_prph(trans, OSC_CLK, OSC_CLK_FORCE_CONTROL); + iwl_read_prph(trans, OSC_CLK); + iwl_read_prph(trans, OSC_CLK); + } + + /* + * Enable DMA clock and wait for it to stabilize. + * + * Write to "CLK_EN_REG"; "1" bits enable clocks, while "0" + * bits do not disable clocks. This preserves any hardware + * bits already set by default in "CLK_CTRL_REG" after reset. + */ + if (!trans->cfg->apmg_not_supported) { + iwl_write_prph(trans, APMG_CLK_EN_REG, + APMG_CLK_VAL_DMA_CLK_RQT); + udelay(20); + + /* Disable L1-Active */ + iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, + APMG_PCIDEV_STT_VAL_L1_ACT_DIS); + + /* Clear the interrupt in APMG if the NIC is in RFKILL */ + iwl_write_prph(trans, APMG_RTC_INT_STT_REG, + APMG_RTC_INT_STT_RFKILL); + } + + set_bit(STATUS_DEVICE_ENABLED, &trans->status); + +out: + return ret; +} + +/* + * Enable LP XTAL to avoid HW bug where device may consume much power if + * FW is not loaded after device reset. LP XTAL is disabled by default + * after device HW reset. Do it only if XTAL is fed by internal source. + * Configure device's "persistence" mode to avoid resetting XTAL again when + * SHRD_HW_RST occurs in S3. + */ +static void iwl_pcie_apm_lp_xtal_enable(struct iwl_trans *trans) +{ + int ret; + u32 apmg_gp1_reg; + u32 apmg_xtal_cfg_reg; + u32 dl_cfg_reg; + + /* Force XTAL ON */ + __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_XTAL_ON); + + /* Reset entire device - do controller reset (results in SHRD_HW_RST) */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + udelay(10); + + /* + * Set "initialization complete" bit to move adapter from + * D0U* --> D0A* (powered-up active) state. + */ + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + /* + * Wait for clock stabilization; once stabilized, access to + * device-internal resources is possible. + */ + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + 25000); + if (WARN_ON(ret < 0)) { + IWL_ERR(trans, "Access time out - failed to enable LP XTAL\n"); + /* Release XTAL ON request */ + __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_XTAL_ON); + return; + } + + /* + * Clear "disable persistence" to avoid LP XTAL resetting when + * SHRD_HW_RST is applied in S3. + */ + iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG, + APMG_PCIDEV_STT_VAL_PERSIST_DIS); + + /* + * Force APMG XTAL to be active to prevent its disabling by HW + * caused by APMG idle state. + */ + apmg_xtal_cfg_reg = iwl_trans_pcie_read_shr(trans, + SHR_APMG_XTAL_CFG_REG); + iwl_trans_pcie_write_shr(trans, SHR_APMG_XTAL_CFG_REG, + apmg_xtal_cfg_reg | + SHR_APMG_XTAL_CFG_XTAL_ON_REQ); + + /* + * Reset entire device again - do controller reset (results in + * SHRD_HW_RST). Turn MAC off before proceeding. + */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + udelay(10); + + /* Enable LP XTAL by indirect access through CSR */ + apmg_gp1_reg = iwl_trans_pcie_read_shr(trans, SHR_APMG_GP1_REG); + iwl_trans_pcie_write_shr(trans, SHR_APMG_GP1_REG, apmg_gp1_reg | + SHR_APMG_GP1_WF_XTAL_LP_EN | + SHR_APMG_GP1_CHICKEN_BIT_SELECT); + + /* Clear delay line clock power up */ + dl_cfg_reg = iwl_trans_pcie_read_shr(trans, SHR_APMG_DL_CFG_REG); + iwl_trans_pcie_write_shr(trans, SHR_APMG_DL_CFG_REG, dl_cfg_reg & + ~SHR_APMG_DL_CFG_DL_CLOCK_POWER_UP); + + /* + * Enable persistence mode to avoid LP XTAL resetting when + * SHRD_HW_RST is applied in S3. + */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PERSIST_MODE); + + /* + * Clear "initialization complete" bit to move adapter from + * D0A* (powered-up Active) --> D0U* (Uninitialized) state. + */ + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + /* Activates XTAL resources monitor */ + __iwl_trans_pcie_set_bit(trans, CSR_MONITOR_CFG_REG, + CSR_MONITOR_XTAL_RESOURCES); + + /* Release XTAL ON request */ + __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_XTAL_ON); + udelay(10); + + /* Release APMG XTAL */ + iwl_trans_pcie_write_shr(trans, SHR_APMG_XTAL_CFG_REG, + apmg_xtal_cfg_reg & + ~SHR_APMG_XTAL_CFG_XTAL_ON_REQ); +} + +static int iwl_pcie_apm_stop_master(struct iwl_trans *trans) +{ + int ret = 0; + + /* stop device's busmaster DMA activity */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER); + + ret = iwl_poll_bit(trans, CSR_RESET, + CSR_RESET_REG_FLAG_MASTER_DISABLED, + CSR_RESET_REG_FLAG_MASTER_DISABLED, 100); + if (ret < 0) + IWL_WARN(trans, "Master Disable Timed Out, 100 usec\n"); + + IWL_DEBUG_INFO(trans, "stop master\n"); + + return ret; +} + +static void iwl_pcie_apm_stop(struct iwl_trans *trans, bool op_mode_leave) +{ + IWL_DEBUG_INFO(trans, "Stop card, put in low power state\n"); + + if (op_mode_leave) { + if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status)) + iwl_pcie_apm_init(trans); + + /* inform ME that we are leaving */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) + iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, + APMG_PCIDEV_STT_VAL_WAKE_ME); + else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PREPARE | + CSR_HW_IF_CONFIG_REG_ENABLE_PME); + mdelay(1); + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + } + mdelay(5); + } + + clear_bit(STATUS_DEVICE_ENABLED, &trans->status); + + /* Stop device's DMA activity */ + iwl_pcie_apm_stop_master(trans); + + if (trans->cfg->lp_xtal_workaround) { + iwl_pcie_apm_lp_xtal_enable(trans); + return; + } + + /* Reset the entire device */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + udelay(10); + + /* + * Clear "initialization complete" bit to move adapter from + * D0A* (powered-up Active) --> D0U* (Uninitialized) state. + */ + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); +} + +static int iwl_pcie_nic_init(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + /* nic_init */ + spin_lock(&trans_pcie->irq_lock); + iwl_pcie_apm_init(trans); + + spin_unlock(&trans_pcie->irq_lock); + + iwl_pcie_set_pwr(trans, false); + + iwl_op_mode_nic_config(trans->op_mode); + + /* Allocate the RX queue, or reset if it is already allocated */ + iwl_pcie_rx_init(trans); + + /* Allocate or reset and init all Tx and Command queues */ + if (iwl_pcie_tx_init(trans)) + return -ENOMEM; + + if (trans->cfg->base_params->shadow_reg_enable) { + /* enable shadow regs in HW */ + iwl_set_bit(trans, CSR_MAC_SHADOW_REG_CTRL, 0x800FFFFF); + IWL_DEBUG_INFO(trans, "Enabling shadow registers in device\n"); + } + + return 0; +} + +#define HW_READY_TIMEOUT (50) + +/* Note: returns poll_bit return value, which is >= 0 if success */ +static int iwl_pcie_set_hw_ready(struct iwl_trans *trans) +{ + int ret; + + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_BIT_NIC_READY); + + /* See if we got it */ + ret = iwl_poll_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_BIT_NIC_READY, + CSR_HW_IF_CONFIG_REG_BIT_NIC_READY, + HW_READY_TIMEOUT); + + if (ret >= 0) + iwl_set_bit(trans, CSR_MBOX_SET_REG, CSR_MBOX_SET_REG_OS_ALIVE); + + IWL_DEBUG_INFO(trans, "hardware%s ready\n", ret < 0 ? " not" : ""); + return ret; +} + +/* Note: returns standard 0/-ERROR code */ +static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) +{ + int ret; + int t = 0; + int iter; + + IWL_DEBUG_INFO(trans, "iwl_trans_prepare_card_hw enter\n"); + + ret = iwl_pcie_set_hw_ready(trans); + /* If the card is ready, exit 0 */ + if (ret >= 0) + return 0; + + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + msleep(1); + + for (iter = 0; iter < 10; iter++) { + /* If HW is not ready, prepare the conditions to check again */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PREPARE); + + do { + ret = iwl_pcie_set_hw_ready(trans); + if (ret >= 0) + return 0; + + usleep_range(200, 1000); + t += 200; + } while (t < 150000); + msleep(25); + } + + IWL_ERR(trans, "Couldn't prepare the card\n"); + + return ret; +} + +/* + * ucode + */ +static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans, u32 dst_addr, + dma_addr_t phy_addr, u32 byte_cnt) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret; + + trans_pcie->ucode_write_complete = false; + + iwl_write_direct32(trans, + FH_TCSR_CHNL_TX_CONFIG_REG(FH_SRVC_CHNL), + FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE); + + iwl_write_direct32(trans, + FH_SRVC_CHNL_SRAM_ADDR_REG(FH_SRVC_CHNL), + dst_addr); + + iwl_write_direct32(trans, + FH_TFDIB_CTRL0_REG(FH_SRVC_CHNL), + phy_addr & FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK); + + iwl_write_direct32(trans, + FH_TFDIB_CTRL1_REG(FH_SRVC_CHNL), + (iwl_get_dma_hi_addr(phy_addr) + << FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt); + + iwl_write_direct32(trans, + FH_TCSR_CHNL_TX_BUF_STS_REG(FH_SRVC_CHNL), + 1 << FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM | + 1 << FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX | + FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID); + + iwl_write_direct32(trans, + FH_TCSR_CHNL_TX_CONFIG_REG(FH_SRVC_CHNL), + FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | + FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE | + FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD); + + ret = wait_event_timeout(trans_pcie->ucode_write_waitq, + trans_pcie->ucode_write_complete, 5 * HZ); + if (!ret) { + IWL_ERR(trans, "Failed to load firmware chunk!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int iwl_pcie_load_section(struct iwl_trans *trans, u8 section_num, + const struct fw_desc *section) +{ + u8 *v_addr; + dma_addr_t p_addr; + u32 offset, chunk_sz = min_t(u32, FH_MEM_TB_MAX_LENGTH, section->len); + int ret = 0; + + IWL_DEBUG_FW(trans, "[%d] uCode section being loaded...\n", + section_num); + + v_addr = dma_alloc_coherent(trans->dev, chunk_sz, &p_addr, + GFP_KERNEL | __GFP_NOWARN); + if (!v_addr) { + IWL_DEBUG_INFO(trans, "Falling back to small chunks of DMA\n"); + chunk_sz = PAGE_SIZE; + v_addr = dma_alloc_coherent(trans->dev, chunk_sz, + &p_addr, GFP_KERNEL); + if (!v_addr) + return -ENOMEM; + } + + for (offset = 0; offset < section->len; offset += chunk_sz) { + u32 copy_size, dst_addr; + bool extended_addr = false; + + copy_size = min_t(u32, chunk_sz, section->len - offset); + dst_addr = section->offset + offset; + + if (dst_addr >= IWL_FW_MEM_EXTENDED_START && + dst_addr <= IWL_FW_MEM_EXTENDED_END) + extended_addr = true; + + if (extended_addr) + iwl_set_bits_prph(trans, LMPM_CHICK, + LMPM_CHICK_EXTENDED_ADDR_SPACE); + + memcpy(v_addr, (u8 *)section->data + offset, copy_size); + ret = iwl_pcie_load_firmware_chunk(trans, dst_addr, p_addr, + copy_size); + + if (extended_addr) + iwl_clear_bits_prph(trans, LMPM_CHICK, + LMPM_CHICK_EXTENDED_ADDR_SPACE); + + if (ret) { + IWL_ERR(trans, + "Could not load the [%d] uCode section\n", + section_num); + break; + } + } + + dma_free_coherent(trans->dev, chunk_sz, v_addr, p_addr); + return ret; +} + +/* + * Driver Takes the ownership on secure machine before FW load + * and prevent race with the BT load. + * W/A for ROM bug. (should be remove in the next Si step) + */ +static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans) +{ + u32 val, loop = 1000; + + /* + * Check the RSA semaphore is accessible. + * If the HW isn't locked and the rsa semaphore isn't accessible, + * we are in trouble. + */ + val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0); + if (val & (BIT(1) | BIT(17))) { + IWL_INFO(trans, + "can't access the RSA semaphore it is write protected\n"); + return 0; + } + + /* take ownership on the AUX IF */ + iwl_write_prph(trans, WFPM_CTRL_REG, WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK); + iwl_write_prph(trans, AUX_MISC_MASTER1_EN, AUX_MISC_MASTER1_EN_SBE_MSK); + + do { + iwl_write_prph(trans, AUX_MISC_MASTER1_SMPHR_STATUS, 0x1); + val = iwl_read_prph(trans, AUX_MISC_MASTER1_SMPHR_STATUS); + if (val == 0x1) { + iwl_write_prph(trans, RSA_ENABLE, 0); + return 0; + } + + udelay(10); + loop--; + } while (loop > 0); + + IWL_ERR(trans, "Failed to take ownership on secure machine\n"); + return -EIO; +} + +static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, + const struct fw_img *image, + int cpu, + int *first_ucode_section) +{ + int shift_param; + int i, ret = 0, sec_num = 0x1; + u32 val, last_read_idx = 0; + + if (cpu == 1) { + shift_param = 0; + *first_ucode_section = 0; + } else { + shift_param = 16; + (*first_ucode_section)++; + } + + for (i = *first_ucode_section; i < IWL_UCODE_SECTION_MAX; i++) { + last_read_idx = i; + + /* + * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between + * CPU1 to CPU2. + * PAGING_SEPARATOR_SECTION delimiter - separate between + * CPU2 non paged to CPU2 paging sec. + */ + if (!image->sec[i].data || + image->sec[i].offset == CPU1_CPU2_SEPARATOR_SECTION || + image->sec[i].offset == PAGING_SEPARATOR_SECTION) { + IWL_DEBUG_FW(trans, + "Break since Data not valid or Empty section, sec = %d\n", + i); + break; + } + + ret = iwl_pcie_load_section(trans, i, &image->sec[i]); + if (ret) + return ret; + + /* Notify the ucode of the loaded section number and status */ + val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + sec_num = (sec_num << 1) | 0x1; + } + + *first_ucode_section = last_read_idx; + + if (cpu == 1) + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF); + else + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFFFFFF); + + return 0; +} + +static int iwl_pcie_load_cpu_sections(struct iwl_trans *trans, + const struct fw_img *image, + int cpu, + int *first_ucode_section) +{ + int shift_param; + int i, ret = 0; + u32 last_read_idx = 0; + + if (cpu == 1) { + shift_param = 0; + *first_ucode_section = 0; + } else { + shift_param = 16; + (*first_ucode_section)++; + } + + for (i = *first_ucode_section; i < IWL_UCODE_SECTION_MAX; i++) { + last_read_idx = i; + + /* + * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between + * CPU1 to CPU2. + * PAGING_SEPARATOR_SECTION delimiter - separate between + * CPU2 non paged to CPU2 paging sec. + */ + if (!image->sec[i].data || + image->sec[i].offset == CPU1_CPU2_SEPARATOR_SECTION || + image->sec[i].offset == PAGING_SEPARATOR_SECTION) { + IWL_DEBUG_FW(trans, + "Break since Data not valid or Empty section, sec = %d\n", + i); + break; + } + + ret = iwl_pcie_load_section(trans, i, &image->sec[i]); + if (ret) + return ret; + } + + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_set_bits_prph(trans, + CSR_UCODE_LOAD_STATUS_ADDR, + (LMPM_CPU_UCODE_LOADING_COMPLETED | + LMPM_CPU_HDRS_LOADING_COMPLETED | + LMPM_CPU_UCODE_LOADING_STARTED) << + shift_param); + + *first_ucode_section = last_read_idx; + + return 0; +} + +static void iwl_pcie_apply_destination(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + const struct iwl_fw_dbg_dest_tlv *dest = trans->dbg_dest_tlv; + int i; + + if (dest->version) + IWL_ERR(trans, + "DBG DEST version is %d - expect issues\n", + dest->version); + + IWL_INFO(trans, "Applying debug destination %s\n", + get_fw_dbg_mode_string(dest->monitor_mode)); + + if (dest->monitor_mode == EXTERNAL_MODE) + iwl_pcie_alloc_fw_monitor(trans, dest->size_power); + else + IWL_WARN(trans, "PCI should have external buffer debug\n"); + + for (i = 0; i < trans->dbg_dest_reg_num; i++) { + u32 addr = le32_to_cpu(dest->reg_ops[i].addr); + u32 val = le32_to_cpu(dest->reg_ops[i].val); + + switch (dest->reg_ops[i].op) { + case CSR_ASSIGN: + iwl_write32(trans, addr, val); + break; + case CSR_SETBIT: + iwl_set_bit(trans, addr, BIT(val)); + break; + case CSR_CLEARBIT: + iwl_clear_bit(trans, addr, BIT(val)); + break; + case PRPH_ASSIGN: + iwl_write_prph(trans, addr, val); + break; + case PRPH_SETBIT: + iwl_set_bits_prph(trans, addr, BIT(val)); + break; + case PRPH_CLEARBIT: + iwl_clear_bits_prph(trans, addr, BIT(val)); + break; + case PRPH_BLOCKBIT: + if (iwl_read_prph(trans, addr) & BIT(val)) { + IWL_ERR(trans, + "BIT(%u) in address 0x%x is 1, stopping FW configuration\n", + val, addr); + goto monitor; + } + break; + default: + IWL_ERR(trans, "FW debug - unknown OP %d\n", + dest->reg_ops[i].op); + break; + } + } + +monitor: + if (dest->monitor_mode == EXTERNAL_MODE && trans_pcie->fw_mon_size) { + iwl_write_prph(trans, le32_to_cpu(dest->base_reg), + trans_pcie->fw_mon_phys >> dest->base_shift); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size - 256) >> + dest->end_shift); + else + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> + dest->end_shift); + } +} + +static int iwl_pcie_load_given_ucode(struct iwl_trans *trans, + const struct fw_img *image) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret = 0; + int first_ucode_section; + + IWL_DEBUG_FW(trans, "working with %s CPU\n", + image->is_dual_cpus ? "Dual" : "Single"); + + /* load to FW the binary non secured sections of CPU1 */ + ret = iwl_pcie_load_cpu_sections(trans, image, 1, &first_ucode_section); + if (ret) + return ret; + + if (image->is_dual_cpus) { + /* set CPU2 header address */ + iwl_write_prph(trans, + LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR, + LMPM_SECURE_CPU2_HDR_MEM_SPACE); + + /* load to FW the binary sections of CPU2 */ + ret = iwl_pcie_load_cpu_sections(trans, image, 2, + &first_ucode_section); + if (ret) + return ret; + } + + /* supported for 7000 only for the moment */ + if (iwlwifi_mod_params.fw_monitor && + trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { + iwl_pcie_alloc_fw_monitor(trans, 0); + + if (trans_pcie->fw_mon_size) { + iwl_write_prph(trans, MON_BUFF_BASE_ADDR, + trans_pcie->fw_mon_phys >> 4); + iwl_write_prph(trans, MON_BUFF_END_ADDR, + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> 4); + } + } else if (trans->dbg_dest_tlv) { + iwl_pcie_apply_destination(trans); + } + + /* release CPU reset */ + iwl_write32(trans, CSR_RESET, 0); + + return 0; +} + +static int iwl_pcie_load_given_ucode_8000(struct iwl_trans *trans, + const struct fw_img *image) +{ + int ret = 0; + int first_ucode_section; + + IWL_DEBUG_FW(trans, "working with %s CPU\n", + image->is_dual_cpus ? "Dual" : "Single"); + + if (trans->dbg_dest_tlv) + iwl_pcie_apply_destination(trans); + + /* TODO: remove in the next Si step */ + ret = iwl_pcie_rsa_race_bug_wa(trans); + if (ret) + return ret; + + /* configure the ucode to be ready to get the secured image */ + /* release CPU reset */ + iwl_write_prph(trans, RELEASE_CPU_RESET, RELEASE_CPU_RESET_BIT); + + /* load to FW the binary Secured sections of CPU1 */ + ret = iwl_pcie_load_cpu_sections_8000(trans, image, 1, + &first_ucode_section); + if (ret) + return ret; + + /* load to FW the binary sections of CPU2 */ + return iwl_pcie_load_cpu_sections_8000(trans, image, 2, + &first_ucode_section); +} + +static int iwl_trans_pcie_start_fw(struct iwl_trans *trans, + const struct fw_img *fw, bool run_in_rfkill) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool hw_rfkill; + int ret; + + mutex_lock(&trans_pcie->mutex); + + /* Someone called stop_device, don't try to start_fw */ + if (trans_pcie->is_down) { + IWL_WARN(trans, + "Can't start_fw since the HW hasn't been started\n"); + ret = EIO; + goto out; + } + + /* This may fail if AMT took ownership of the device */ + if (iwl_pcie_prepare_card_hw(trans)) { + IWL_WARN(trans, "Exit HW not ready\n"); + ret = -EIO; + goto out; + } + + iwl_enable_rfkill_int(trans); + + /* If platform's RF_KILL switch is NOT set to KILL */ + hw_rfkill = iwl_is_rfkill_set(trans); + if (hw_rfkill) + set_bit(STATUS_RFKILL, &trans->status); + else + clear_bit(STATUS_RFKILL, &trans->status); + iwl_trans_pcie_rf_kill(trans, hw_rfkill); + if (hw_rfkill && !run_in_rfkill) { + ret = -ERFKILL; + goto out; + } + + iwl_write32(trans, CSR_INT, 0xFFFFFFFF); + + ret = iwl_pcie_nic_init(trans); + if (ret) { + IWL_ERR(trans, "Unable to init nic\n"); + goto out; + } + + /* make sure rfkill handshake bits are cleared */ + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL); + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, + CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED); + + /* clear (again), then enable host interrupts */ + iwl_write32(trans, CSR_INT, 0xFFFFFFFF); + iwl_enable_interrupts(trans); + + /* really make sure rfkill handshake bits are cleared */ + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL); + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL); + + /* Load the given image to the HW */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + ret = iwl_pcie_load_given_ucode_8000(trans, fw); + else + ret = iwl_pcie_load_given_ucode(trans, fw); + +out: + mutex_unlock(&trans_pcie->mutex); + return ret; +} + +static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr) +{ + iwl_pcie_reset_ict(trans); + iwl_pcie_tx_start(trans, scd_addr); +} + +static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool hw_rfkill, was_hw_rfkill; + + lockdep_assert_held(&trans_pcie->mutex); + + if (trans_pcie->is_down) + return; + + trans_pcie->is_down = true; + + was_hw_rfkill = iwl_is_rfkill_set(trans); + + /* tell the device to stop sending interrupts */ + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + /* device going down, Stop using ICT table */ + iwl_pcie_disable_ict(trans); + + /* + * If a HW restart happens during firmware loading, + * then the firmware loading might call this function + * and later it might be called again due to the + * restart. So don't process again if the device is + * already dead. + */ + if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) { + IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n"); + iwl_pcie_tx_stop(trans); + iwl_pcie_rx_stop(trans); + + /* Power-down device's busmaster DMA clocks */ + if (!trans->cfg->apmg_not_supported) { + iwl_write_prph(trans, APMG_CLK_DIS_REG, + APMG_CLK_VAL_DMA_CLK_RQT); + udelay(5); + } + } + + /* Make sure (redundant) we've released our request to stay awake */ + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + + /* Stop the device, and put it in low power state */ + iwl_pcie_apm_stop(trans, false); + + /* stop and reset the on-board processor */ + iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + udelay(20); + + /* + * Upon stop, the APM issues an interrupt if HW RF kill is set. + * This is a bug in certain verions of the hardware. + * Certain devices also keep sending HW RF kill interrupt all + * the time, unless the interrupt is ACKed even if the interrupt + * should be masked. Re-ACK all the interrupts here. + */ + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + + /* clear all status bits */ + clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status); + clear_bit(STATUS_INT_ENABLED, &trans->status); + clear_bit(STATUS_TPOWER_PMI, &trans->status); + clear_bit(STATUS_RFKILL, &trans->status); + + /* + * Even if we stop the HW, we still want the RF kill + * interrupt + */ + iwl_enable_rfkill_int(trans); + + /* + * Check again since the RF kill state may have changed while + * all the interrupts were disabled, in this case we couldn't + * receive the RF kill interrupt and update the state in the + * op_mode. + * Don't call the op_mode if the rkfill state hasn't changed. + * This allows the op_mode to call stop_device from the rfkill + * notification without endless recursion. Under very rare + * circumstances, we might have a small recursion if the rfkill + * state changed exactly now while we were called from stop_device. + * This is very unlikely but can happen and is supported. + */ + hw_rfkill = iwl_is_rfkill_set(trans); + if (hw_rfkill) + set_bit(STATUS_RFKILL, &trans->status); + else + clear_bit(STATUS_RFKILL, &trans->status); + if (hw_rfkill != was_hw_rfkill) + iwl_trans_pcie_rf_kill(trans, hw_rfkill); + + /* re-take ownership to prevent other users from stealing the deivce */ + iwl_pcie_prepare_card_hw(trans); +} + +static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + mutex_lock(&trans_pcie->mutex); + _iwl_trans_pcie_stop_device(trans, low_power); + mutex_unlock(&trans_pcie->mutex); +} + +void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state) +{ + struct iwl_trans_pcie __maybe_unused *trans_pcie = + IWL_TRANS_GET_PCIE_TRANS(trans); + + lockdep_assert_held(&trans_pcie->mutex); + + if (iwl_op_mode_hw_rf_kill(trans->op_mode, state)) + _iwl_trans_pcie_stop_device(trans, true); +} + +static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) { + /* Enable persistence mode to avoid reset */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PERSIST_MODE); + } + + iwl_disable_interrupts(trans); + + /* + * in testing mode, the host stays awake and the + * hardware won't be reset (not even partially) + */ + if (test) + return; + + iwl_pcie_disable_ict(trans); + + synchronize_irq(trans_pcie->pci_dev->irq); + + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D3) { + /* + * reset TX queues -- some of their registers reset during S3 + * so if we don't reset everything here the D3 image would try + * to execute some invalid memory upon resume + */ + iwl_trans_pcie_tx_reset(trans); + } + + iwl_pcie_set_pwr(trans, true); +} + +static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans, + enum iwl_d3_status *status, + bool test) +{ + u32 val; + int ret; + + if (test) { + iwl_enable_interrupts(trans); + *status = IWL_D3_STATUS_ALIVE; + return 0; + } + + /* + * Also enables interrupts - none will happen as the device doesn't + * know we're waking it up, only when the opmode actually tells it + * after this call. + */ + iwl_pcie_reset_ict(trans); + + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + udelay(2); + + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + 25000); + if (ret < 0) { + IWL_ERR(trans, "Failed to resume the device (mac ready)\n"); + return ret; + } + + iwl_pcie_set_pwr(trans, false); + + if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) { + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + } else { + iwl_trans_pcie_tx_reset(trans); + + ret = iwl_pcie_rx_init(trans); + if (ret) { + IWL_ERR(trans, + "Failed to resume the device (RX reset)\n"); + return ret; + } + } + + val = iwl_read32(trans, CSR_RESET); + if (val & CSR_RESET_REG_FLAG_NEVO_RESET) + *status = IWL_D3_STATUS_RESET; + else + *status = IWL_D3_STATUS_ALIVE; + + return 0; +} + +static int _iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool hw_rfkill; + int err; + + lockdep_assert_held(&trans_pcie->mutex); + + err = iwl_pcie_prepare_card_hw(trans); + if (err) { + IWL_ERR(trans, "Error while preparing HW: %d\n", err); + return err; + } + + /* Reset the entire device */ + iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + usleep_range(10, 15); + + iwl_pcie_apm_init(trans); + + /* From now on, the op_mode will be kept updated about RF kill state */ + iwl_enable_rfkill_int(trans); + + /* Set is_down to false here so that...*/ + trans_pcie->is_down = false; + + hw_rfkill = iwl_is_rfkill_set(trans); + if (hw_rfkill) + set_bit(STATUS_RFKILL, &trans->status); + else + clear_bit(STATUS_RFKILL, &trans->status); + /* ... rfkill can call stop_device and set it false if needed */ + iwl_trans_pcie_rf_kill(trans, hw_rfkill); + + return 0; +} + +static int iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret; + + mutex_lock(&trans_pcie->mutex); + ret = _iwl_trans_pcie_start_hw(trans, low_power); + mutex_unlock(&trans_pcie->mutex); + + return ret; +} + +static void iwl_trans_pcie_op_mode_leave(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + mutex_lock(&trans_pcie->mutex); + + /* disable interrupts - don't enable HW RF kill interrupt */ + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + iwl_pcie_apm_stop(trans, true); + + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + iwl_pcie_disable_ict(trans); + + mutex_unlock(&trans_pcie->mutex); + + synchronize_irq(trans_pcie->pci_dev->irq); +} + +static void iwl_trans_pcie_write8(struct iwl_trans *trans, u32 ofs, u8 val) +{ + writeb(val, IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs); +} + +static void iwl_trans_pcie_write32(struct iwl_trans *trans, u32 ofs, u32 val) +{ + writel(val, IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs); +} + +static u32 iwl_trans_pcie_read32(struct iwl_trans *trans, u32 ofs) +{ + return readl(IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs); +} + +static u32 iwl_trans_pcie_read_prph(struct iwl_trans *trans, u32 reg) +{ + iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_RADDR, + ((reg & 0x000FFFFF) | (3 << 24))); + return iwl_trans_pcie_read32(trans, HBUS_TARG_PRPH_RDAT); +} + +static void iwl_trans_pcie_write_prph(struct iwl_trans *trans, u32 addr, + u32 val) +{ + iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WADDR, + ((addr & 0x000FFFFF) | (3 << 24))); + iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WDAT, val); +} + +static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget) +{ + WARN_ON(1); + return 0; +} + +static void iwl_trans_pcie_configure(struct iwl_trans *trans, + const struct iwl_trans_config *trans_cfg) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + trans_pcie->cmd_queue = trans_cfg->cmd_queue; + trans_pcie->cmd_fifo = trans_cfg->cmd_fifo; + trans_pcie->cmd_q_wdg_timeout = trans_cfg->cmd_q_wdg_timeout; + if (WARN_ON(trans_cfg->n_no_reclaim_cmds > MAX_NO_RECLAIM_CMDS)) + trans_pcie->n_no_reclaim_cmds = 0; + else + trans_pcie->n_no_reclaim_cmds = trans_cfg->n_no_reclaim_cmds; + if (trans_pcie->n_no_reclaim_cmds) + memcpy(trans_pcie->no_reclaim_cmds, trans_cfg->no_reclaim_cmds, + trans_pcie->n_no_reclaim_cmds * sizeof(u8)); + + trans_pcie->rx_buf_size = trans_cfg->rx_buf_size; + trans_pcie->rx_page_order = + iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size); + + trans_pcie->wide_cmd_header = trans_cfg->wide_cmd_header; + trans_pcie->bc_table_dword = trans_cfg->bc_table_dword; + trans_pcie->scd_set_active = trans_cfg->scd_set_active; + trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx; + + trans->command_groups = trans_cfg->command_groups; + trans->command_groups_size = trans_cfg->command_groups_size; + + /* init ref_count to 1 (should be cleared when ucode is loaded) */ + trans_pcie->ref_count = 1; + + /* Initialize NAPI here - it should be before registering to mac80211 + * in the opmode but after the HW struct is allocated. + * As this function may be called again in some corner cases don't + * do anything if NAPI was already initialized. + */ + if (!trans_pcie->napi.poll) { + init_dummy_netdev(&trans_pcie->napi_dev); + netif_napi_add(&trans_pcie->napi_dev, &trans_pcie->napi, + iwl_pcie_dummy_napi_poll, 64); + } +} + +void iwl_trans_pcie_free(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i; + + synchronize_irq(trans_pcie->pci_dev->irq); + + iwl_pcie_tx_free(trans); + iwl_pcie_rx_free(trans); + + free_irq(trans_pcie->pci_dev->irq, trans); + iwl_pcie_free_ict(trans); + + pci_disable_msi(trans_pcie->pci_dev); + iounmap(trans_pcie->hw_base); + pci_release_regions(trans_pcie->pci_dev); + pci_disable_device(trans_pcie->pci_dev); + + if (trans_pcie->napi.poll) + netif_napi_del(&trans_pcie->napi); + + iwl_pcie_free_fw_monitor(trans); + + for_each_possible_cpu(i) { + struct iwl_tso_hdr_page *p = + per_cpu_ptr(trans_pcie->tso_hdr_page, i); + + if (p->page) + __free_page(p->page); + } + + free_percpu(trans_pcie->tso_hdr_page); + iwl_trans_free(trans); +} + +static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state) +{ + if (state) + set_bit(STATUS_TPOWER_PMI, &trans->status); + else + clear_bit(STATUS_TPOWER_PMI, &trans->status); +} + +static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, + unsigned long *flags) +{ + int ret; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + spin_lock_irqsave(&trans_pcie->reg_lock, *flags); + + if (trans_pcie->cmd_hold_nic_awake) + goto out; + + /* this bit wakes up the NIC */ + __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + udelay(2); + + /* + * These bits say the device is running, and should keep running for + * at least a short while (at least as long as MAC_ACCESS_REQ stays 1), + * but they do not indicate that embedded SRAM is restored yet; + * 3945 and 4965 have volatile SRAM, and must save/restore contents + * to/from host DRAM when sleeping/waking for power-saving. + * Each direction takes approximately 1/4 millisecond; with this + * overhead, it's a good idea to grab and hold MAC_ACCESS_REQUEST if a + * series of register accesses are expected (e.g. reading Event Log), + * to keep device from sleeping. + * + * CSR_UCODE_DRV_GP1 register bit MAC_SLEEP == 0 indicates that + * SRAM is okay/restored. We don't check that here because this call + * is just for hardware register access; but GP1 MAC_SLEEP check is a + * good idea before accessing 3945/4965 SRAM (e.g. reading Event Log). + * + * 5000 series and later (including 1000 series) have non-volatile SRAM, + * and do not save/restore SRAM when power cycling. + */ + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN, + (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY | + CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000); + if (unlikely(ret < 0)) { + iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI); + WARN_ONCE(1, + "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", + iwl_read32(trans, CSR_GP_CNTRL)); + spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags); + return false; + } + +out: + /* + * Fool sparse by faking we release the lock - sparse will + * track nic_access anyway. + */ + __release(&trans_pcie->reg_lock); + return true; +} + +static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans, + unsigned long *flags) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + lockdep_assert_held(&trans_pcie->reg_lock); + + /* + * Fool sparse by faking we acquiring the lock - sparse will + * track nic_access anyway. + */ + __acquire(&trans_pcie->reg_lock); + + if (trans_pcie->cmd_hold_nic_awake) + goto out; + + __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + /* + * Above we read the CSR_GP_CNTRL register, which will flush + * any previous writes, but we need the write that clears the + * MAC_ACCESS_REQ bit to be performed before any other writes + * scheduled on different CPUs (after we drop reg_lock). + */ + mmiowb(); +out: + spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags); +} + +static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, + void *buf, int dwords) +{ + unsigned long flags; + int offs, ret = 0; + u32 *vals = buf; + + if (iwl_trans_grab_nic_access(trans, &flags)) { + iwl_write32(trans, HBUS_TARG_MEM_RADDR, addr); + for (offs = 0; offs < dwords; offs++) + vals[offs] = iwl_read32(trans, HBUS_TARG_MEM_RDAT); + iwl_trans_release_nic_access(trans, &flags); + } else { + ret = -EBUSY; + } + return ret; +} + +static int iwl_trans_pcie_write_mem(struct iwl_trans *trans, u32 addr, + const void *buf, int dwords) +{ + unsigned long flags; + int offs, ret = 0; + const u32 *vals = buf; + + if (iwl_trans_grab_nic_access(trans, &flags)) { + iwl_write32(trans, HBUS_TARG_MEM_WADDR, addr); + for (offs = 0; offs < dwords; offs++) + iwl_write32(trans, HBUS_TARG_MEM_WDAT, + vals ? vals[offs] : 0); + iwl_trans_release_nic_access(trans, &flags); + } else { + ret = -EBUSY; + } + return ret; +} + +static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans, + unsigned long txqs, + bool freeze) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int queue; + + for_each_set_bit(queue, &txqs, BITS_PER_LONG) { + struct iwl_txq *txq = &trans_pcie->txq[queue]; + unsigned long now; + + spin_lock_bh(&txq->lock); + + now = jiffies; + + if (txq->frozen == freeze) + goto next_queue; + + IWL_DEBUG_TX_QUEUES(trans, "%s TXQ %d\n", + freeze ? "Freezing" : "Waking", queue); + + txq->frozen = freeze; + + if (txq->q.read_ptr == txq->q.write_ptr) + goto next_queue; + + if (freeze) { + if (unlikely(time_after(now, + txq->stuck_timer.expires))) { + /* + * The timer should have fired, maybe it is + * spinning right now on the lock. + */ + goto next_queue; + } + /* remember how long until the timer fires */ + txq->frozen_expiry_remainder = + txq->stuck_timer.expires - now; + del_timer(&txq->stuck_timer); + goto next_queue; + } + + /* + * Wake a non-empty queue -> arm timer with the + * remainder before it froze + */ + mod_timer(&txq->stuck_timer, + now + txq->frozen_expiry_remainder); + +next_queue: + spin_unlock_bh(&txq->lock); + } +} + +static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i; + + for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) { + struct iwl_txq *txq = &trans_pcie->txq[i]; + + if (i == trans_pcie->cmd_queue) + continue; + + spin_lock_bh(&txq->lock); + + if (!block && !(WARN_ON_ONCE(!txq->block))) { + txq->block--; + if (!txq->block) { + iwl_write32(trans, HBUS_TARG_WRPTR, + txq->q.write_ptr | (i << 8)); + } + } else if (block) { + txq->block++; + } + + spin_unlock_bh(&txq->lock); + } +} + +#define IWL_FLUSH_WAIT_MS 2000 + +static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_txq *txq; + struct iwl_queue *q; + int cnt; + unsigned long now = jiffies; + u32 scd_sram_addr; + u8 buf[16]; + int ret = 0; + + /* waiting for all the tx frames complete might take a while */ + for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { + u8 wr_ptr; + + if (cnt == trans_pcie->cmd_queue) + continue; + if (!test_bit(cnt, trans_pcie->queue_used)) + continue; + if (!(BIT(cnt) & txq_bm)) + continue; + + IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt); + txq = &trans_pcie->txq[cnt]; + q = &txq->q; + wr_ptr = ACCESS_ONCE(q->write_ptr); + + while (q->read_ptr != ACCESS_ONCE(q->write_ptr) && + !time_after(jiffies, + now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { + u8 write_ptr = ACCESS_ONCE(q->write_ptr); + + if (WARN_ONCE(wr_ptr != write_ptr, + "WR pointer moved while flushing %d -> %d\n", + wr_ptr, write_ptr)) + return -ETIMEDOUT; + msleep(1); + } + + if (q->read_ptr != q->write_ptr) { + IWL_ERR(trans, + "fail to flush all tx fifo queues Q %d\n", cnt); + ret = -ETIMEDOUT; + break; + } + IWL_DEBUG_TX_QUEUES(trans, "Queue %d is now empty.\n", cnt); + } + + if (!ret) + return 0; + + IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", + txq->q.read_ptr, txq->q.write_ptr); + + scd_sram_addr = trans_pcie->scd_base_addr + + SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); + iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); + + iwl_print_hex_error(trans, buf, sizeof(buf)); + + for (cnt = 0; cnt < FH_TCSR_CHNL_NUM; cnt++) + IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", cnt, + iwl_read_direct32(trans, FH_TX_TRB_REG(cnt))); + + for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { + u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(cnt)); + u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7; + bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE)); + u32 tbl_dw = + iwl_trans_read_mem32(trans, trans_pcie->scd_base_addr + + SCD_TRANS_TBL_OFFSET_QUEUE(cnt)); + + if (cnt & 0x1) + tbl_dw = (tbl_dw & 0xFFFF0000) >> 16; + else + tbl_dw = tbl_dw & 0x0000FFFF; + + IWL_ERR(trans, + "Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n", + cnt, active ? "" : "in", fifo, tbl_dw, + iwl_read_prph(trans, SCD_QUEUE_RDPTR(cnt)) & + (TFD_QUEUE_SIZE_MAX - 1), + iwl_read_prph(trans, SCD_QUEUE_WRPTR(cnt))); + } + + return ret; +} + +static void iwl_trans_pcie_set_bits_mask(struct iwl_trans *trans, u32 reg, + u32 mask, u32 value) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + unsigned long flags; + + spin_lock_irqsave(&trans_pcie->reg_lock, flags); + __iwl_trans_pcie_set_bits_mask(trans, reg, mask, value); + spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); +} + +void iwl_trans_pcie_ref(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + unsigned long flags; + + if (iwlwifi_mod_params.d0i3_disable) + return; + + spin_lock_irqsave(&trans_pcie->ref_lock, flags); + IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count); + trans_pcie->ref_count++; + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); +} + +void iwl_trans_pcie_unref(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + unsigned long flags; + + if (iwlwifi_mod_params.d0i3_disable) + return; + + spin_lock_irqsave(&trans_pcie->ref_lock, flags); + IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count); + if (WARN_ON_ONCE(trans_pcie->ref_count == 0)) { + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); + return; + } + trans_pcie->ref_count--; + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); +} + +static const char *get_csr_string(int cmd) +{ +#define IWL_CMD(x) case x: return #x + switch (cmd) { + IWL_CMD(CSR_HW_IF_CONFIG_REG); + IWL_CMD(CSR_INT_COALESCING); + IWL_CMD(CSR_INT); + IWL_CMD(CSR_INT_MASK); + IWL_CMD(CSR_FH_INT_STATUS); + IWL_CMD(CSR_GPIO_IN); + IWL_CMD(CSR_RESET); + IWL_CMD(CSR_GP_CNTRL); + IWL_CMD(CSR_HW_REV); + IWL_CMD(CSR_EEPROM_REG); + IWL_CMD(CSR_EEPROM_GP); + IWL_CMD(CSR_OTP_GP_REG); + IWL_CMD(CSR_GIO_REG); + IWL_CMD(CSR_GP_UCODE_REG); + IWL_CMD(CSR_GP_DRIVER_REG); + IWL_CMD(CSR_UCODE_DRV_GP1); + IWL_CMD(CSR_UCODE_DRV_GP2); + IWL_CMD(CSR_LED_REG); + IWL_CMD(CSR_DRAM_INT_TBL_REG); + IWL_CMD(CSR_GIO_CHICKEN_BITS); + IWL_CMD(CSR_ANA_PLL_CFG); + IWL_CMD(CSR_HW_REV_WA_REG); + IWL_CMD(CSR_MONITOR_STATUS_REG); + IWL_CMD(CSR_DBG_HPET_MEM_REG); + default: + return "UNKNOWN"; + } +#undef IWL_CMD +} + +void iwl_pcie_dump_csr(struct iwl_trans *trans) +{ + int i; + static const u32 csr_tbl[] = { + CSR_HW_IF_CONFIG_REG, + CSR_INT_COALESCING, + CSR_INT, + CSR_INT_MASK, + CSR_FH_INT_STATUS, + CSR_GPIO_IN, + CSR_RESET, + CSR_GP_CNTRL, + CSR_HW_REV, + CSR_EEPROM_REG, + CSR_EEPROM_GP, + CSR_OTP_GP_REG, + CSR_GIO_REG, + CSR_GP_UCODE_REG, + CSR_GP_DRIVER_REG, + CSR_UCODE_DRV_GP1, + CSR_UCODE_DRV_GP2, + CSR_LED_REG, + CSR_DRAM_INT_TBL_REG, + CSR_GIO_CHICKEN_BITS, + CSR_ANA_PLL_CFG, + CSR_MONITOR_STATUS_REG, + CSR_HW_REV_WA_REG, + CSR_DBG_HPET_MEM_REG + }; + IWL_ERR(trans, "CSR values:\n"); + IWL_ERR(trans, "(2nd byte of CSR_INT_COALESCING is " + "CSR_INT_PERIODIC_REG)\n"); + for (i = 0; i < ARRAY_SIZE(csr_tbl); i++) { + IWL_ERR(trans, " %25s: 0X%08x\n", + get_csr_string(csr_tbl[i]), + iwl_read32(trans, csr_tbl[i])); + } +} + +#ifdef CONFIG_IWLWIFI_DEBUGFS +/* create and remove of files */ +#define DEBUGFS_ADD_FILE(name, parent, mode) do { \ + if (!debugfs_create_file(#name, mode, parent, trans, \ + &iwl_dbgfs_##name##_ops)) \ + goto err; \ +} while (0) + +/* file operation */ +#define DEBUGFS_READ_FILE_OPS(name) \ +static const struct file_operations iwl_dbgfs_##name##_ops = { \ + .read = iwl_dbgfs_##name##_read, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +}; + +#define DEBUGFS_WRITE_FILE_OPS(name) \ +static const struct file_operations iwl_dbgfs_##name##_ops = { \ + .write = iwl_dbgfs_##name##_write, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +}; + +#define DEBUGFS_READ_WRITE_FILE_OPS(name) \ +static const struct file_operations iwl_dbgfs_##name##_ops = { \ + .write = iwl_dbgfs_##name##_write, \ + .read = iwl_dbgfs_##name##_read, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +}; + +static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_txq *txq; + struct iwl_queue *q; + char *buf; + int pos = 0; + int cnt; + int ret; + size_t bufsz; + + bufsz = sizeof(char) * 75 * trans->cfg->base_params->num_of_queues; + + if (!trans_pcie->txq) + return -EAGAIN; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { + txq = &trans_pcie->txq[cnt]; + q = &txq->q; + pos += scnprintf(buf + pos, bufsz - pos, + "hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n", + cnt, q->read_ptr, q->write_ptr, + !!test_bit(cnt, trans_pcie->queue_used), + !!test_bit(cnt, trans_pcie->queue_stopped), + txq->need_update, txq->frozen, + (cnt == trans_pcie->cmd_queue ? " HCMD" : "")); + } + ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); + kfree(buf); + return ret; +} + +static ssize_t iwl_dbgfs_rx_queue_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_rxq *rxq = &trans_pcie->rxq; + char buf[256]; + int pos = 0; + const size_t bufsz = sizeof(buf); + + pos += scnprintf(buf + pos, bufsz - pos, "read: %u\n", + rxq->read); + pos += scnprintf(buf + pos, bufsz - pos, "write: %u\n", + rxq->write); + pos += scnprintf(buf + pos, bufsz - pos, "write_actual: %u\n", + rxq->write_actual); + pos += scnprintf(buf + pos, bufsz - pos, "need_update: %d\n", + rxq->need_update); + pos += scnprintf(buf + pos, bufsz - pos, "free_count: %u\n", + rxq->free_count); + if (rxq->rb_stts) { + pos += scnprintf(buf + pos, bufsz - pos, "closed_rb_num: %u\n", + le16_to_cpu(rxq->rb_stts->closed_rb_num) & 0x0FFF); + } else { + pos += scnprintf(buf + pos, bufsz - pos, + "closed_rb_num: Not Allocated\n"); + } + return simple_read_from_buffer(user_buf, count, ppos, buf, pos); +} + +static ssize_t iwl_dbgfs_interrupt_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct isr_statistics *isr_stats = &trans_pcie->isr_stats; + + int pos = 0; + char *buf; + int bufsz = 24 * 64; /* 24 items * 64 char per item */ + ssize_t ret; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos += scnprintf(buf + pos, bufsz - pos, + "Interrupt Statistics Report:\n"); + + pos += scnprintf(buf + pos, bufsz - pos, "HW Error:\t\t\t %u\n", + isr_stats->hw); + pos += scnprintf(buf + pos, bufsz - pos, "SW Error:\t\t\t %u\n", + isr_stats->sw); + if (isr_stats->sw || isr_stats->hw) { + pos += scnprintf(buf + pos, bufsz - pos, + "\tLast Restarting Code: 0x%X\n", + isr_stats->err_code); + } +#ifdef CONFIG_IWLWIFI_DEBUG + pos += scnprintf(buf + pos, bufsz - pos, "Frame transmitted:\t\t %u\n", + isr_stats->sch); + pos += scnprintf(buf + pos, bufsz - pos, "Alive interrupt:\t\t %u\n", + isr_stats->alive); +#endif + pos += scnprintf(buf + pos, bufsz - pos, + "HW RF KILL switch toggled:\t %u\n", isr_stats->rfkill); + + pos += scnprintf(buf + pos, bufsz - pos, "CT KILL:\t\t\t %u\n", + isr_stats->ctkill); + + pos += scnprintf(buf + pos, bufsz - pos, "Wakeup Interrupt:\t\t %u\n", + isr_stats->wakeup); + + pos += scnprintf(buf + pos, bufsz - pos, + "Rx command responses:\t\t %u\n", isr_stats->rx); + + pos += scnprintf(buf + pos, bufsz - pos, "Tx/FH interrupt:\t\t %u\n", + isr_stats->tx); + + pos += scnprintf(buf + pos, bufsz - pos, "Unexpected INTA:\t\t %u\n", + isr_stats->unhandled); + + ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); + kfree(buf); + return ret; +} + +static ssize_t iwl_dbgfs_interrupt_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct isr_statistics *isr_stats = &trans_pcie->isr_stats; + + char buf[8]; + int buf_size; + u32 reset_flag; + + memset(buf, 0, sizeof(buf)); + buf_size = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + if (sscanf(buf, "%x", &reset_flag) != 1) + return -EFAULT; + if (reset_flag == 0) + memset(isr_stats, 0, sizeof(*isr_stats)); + + return count; +} + +static ssize_t iwl_dbgfs_csr_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + char buf[8]; + int buf_size; + int csr; + + memset(buf, 0, sizeof(buf)); + buf_size = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + if (sscanf(buf, "%d", &csr) != 1) + return -EFAULT; + + iwl_pcie_dump_csr(trans); + + return count; +} + +static ssize_t iwl_dbgfs_fh_reg_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + char *buf = NULL; + ssize_t ret; + + ret = iwl_dump_fh(trans, &buf); + if (ret < 0) + return ret; + if (!buf) + return -EINVAL; + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); + kfree(buf); + return ret; +} + +DEBUGFS_READ_WRITE_FILE_OPS(interrupt); +DEBUGFS_READ_FILE_OPS(fh_reg); +DEBUGFS_READ_FILE_OPS(rx_queue); +DEBUGFS_READ_FILE_OPS(tx_queue); +DEBUGFS_WRITE_FILE_OPS(csr); + +/* Create the debugfs files and directories */ +int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) +{ + struct dentry *dir = trans->dbgfs_dir; + + DEBUGFS_ADD_FILE(rx_queue, dir, S_IRUSR); + DEBUGFS_ADD_FILE(tx_queue, dir, S_IRUSR); + DEBUGFS_ADD_FILE(interrupt, dir, S_IWUSR | S_IRUSR); + DEBUGFS_ADD_FILE(csr, dir, S_IWUSR); + DEBUGFS_ADD_FILE(fh_reg, dir, S_IRUSR); + return 0; + +err: + IWL_ERR(trans, "failed to create the trans debugfs entry\n"); + return -ENOMEM; +} +#endif /*CONFIG_IWLWIFI_DEBUGFS */ + +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd) +{ + u32 cmdlen = 0; + int i; + + for (i = 0; i < IWL_NUM_OF_TBS; i++) + cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); + + return cmdlen; +} + +static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data, + int allocated_rb_nums) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int max_len = PAGE_SIZE << trans_pcie->rx_page_order; + struct iwl_rxq *rxq = &trans_pcie->rxq; + u32 i, r, j, rb_len = 0; + + spin_lock(&rxq->lock); + + r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; + + for (i = rxq->read, j = 0; + i != r && j < allocated_rb_nums; + i = (i + 1) & RX_QUEUE_MASK, j++) { + struct iwl_rx_mem_buffer *rxb = rxq->queue[i]; + struct iwl_fw_error_dump_rb *rb; + + dma_unmap_page(trans->dev, rxb->page_dma, max_len, + DMA_FROM_DEVICE); + + rb_len += sizeof(**data) + sizeof(*rb) + max_len; + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_RB); + (*data)->len = cpu_to_le32(sizeof(*rb) + max_len); + rb = (void *)(*data)->data; + rb->index = cpu_to_le32(i); + memcpy(rb->data, page_address(rxb->page), max_len); + /* remap the page for the free benefit */ + rxb->page_dma = dma_map_page(trans->dev, rxb->page, 0, + max_len, + DMA_FROM_DEVICE); + + *data = iwl_fw_error_next_data(*data); + } + + spin_unlock(&rxq->lock); + + return rb_len; +} +#define IWL_CSR_TO_DUMP (0x250) + +static u32 iwl_trans_pcie_dump_csr(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data) +{ + u32 csr_len = sizeof(**data) + IWL_CSR_TO_DUMP; + __le32 *val; + int i; + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_CSR); + (*data)->len = cpu_to_le32(IWL_CSR_TO_DUMP); + val = (void *)(*data)->data; + + for (i = 0; i < IWL_CSR_TO_DUMP; i += 4) + *val++ = cpu_to_le32(iwl_trans_pcie_read32(trans, i)); + + *data = iwl_fw_error_next_data(*data); + + return csr_len; +} + +static u32 iwl_trans_pcie_fh_regs_dump(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data) +{ + u32 fh_regs_len = FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND; + unsigned long flags; + __le32 *val; + int i; + + if (!iwl_trans_grab_nic_access(trans, &flags)) + return 0; + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FH_REGS); + (*data)->len = cpu_to_le32(fh_regs_len); + val = (void *)(*data)->data; + + for (i = FH_MEM_LOWER_BOUND; i < FH_MEM_UPPER_BOUND; i += sizeof(u32)) + *val++ = cpu_to_le32(iwl_trans_pcie_read32(trans, i)); + + iwl_trans_release_nic_access(trans, &flags); + + *data = iwl_fw_error_next_data(*data); + + return sizeof(**data) + fh_regs_len; +} + +static u32 +iwl_trans_pci_dump_marbh_monitor(struct iwl_trans *trans, + struct iwl_fw_error_dump_fw_mon *fw_mon_data, + u32 monitor_len) +{ + u32 buf_size_in_dwords = (monitor_len >> 2); + u32 *buffer = (u32 *)fw_mon_data->data; + unsigned long flags; + u32 i; + + if (!iwl_trans_grab_nic_access(trans, &flags)) + return 0; + + iwl_write_prph_no_grab(trans, MON_DMARB_RD_CTL_ADDR, 0x1); + for (i = 0; i < buf_size_in_dwords; i++) + buffer[i] = iwl_read_prph_no_grab(trans, + MON_DMARB_RD_DATA_ADDR); + iwl_write_prph_no_grab(trans, MON_DMARB_RD_CTL_ADDR, 0x0); + + iwl_trans_release_nic_access(trans, &flags); + + return monitor_len; +} + +static u32 +iwl_trans_pcie_dump_monitor(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data, + u32 monitor_len) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 len = 0; + + if ((trans_pcie->fw_mon_page && + trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) || + trans->dbg_dest_tlv) { + struct iwl_fw_error_dump_fw_mon *fw_mon_data; + u32 base, write_ptr, wrap_cnt; + + /* If there was a dest TLV - use the values from there */ + if (trans->dbg_dest_tlv) { + write_ptr = + le32_to_cpu(trans->dbg_dest_tlv->write_ptr_reg); + wrap_cnt = le32_to_cpu(trans->dbg_dest_tlv->wrap_count); + base = le32_to_cpu(trans->dbg_dest_tlv->base_reg); + } else { + base = MON_BUFF_BASE_ADDR; + write_ptr = MON_BUFF_WRPTR; + wrap_cnt = MON_BUFF_CYCLE_CNT; + } + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FW_MONITOR); + fw_mon_data = (void *)(*data)->data; + fw_mon_data->fw_mon_wr_ptr = + cpu_to_le32(iwl_read_prph(trans, write_ptr)); + fw_mon_data->fw_mon_cycle_cnt = + cpu_to_le32(iwl_read_prph(trans, wrap_cnt)); + fw_mon_data->fw_mon_base_ptr = + cpu_to_le32(iwl_read_prph(trans, base)); + + len += sizeof(**data) + sizeof(*fw_mon_data); + if (trans_pcie->fw_mon_page) { + /* + * The firmware is now asserted, it won't write anything + * to the buffer. CPU can take ownership to fetch the + * data. The buffer will be handed back to the device + * before the firmware will be restarted. + */ + dma_sync_single_for_cpu(trans->dev, + trans_pcie->fw_mon_phys, + trans_pcie->fw_mon_size, + DMA_FROM_DEVICE); + memcpy(fw_mon_data->data, + page_address(trans_pcie->fw_mon_page), + trans_pcie->fw_mon_size); + + monitor_len = trans_pcie->fw_mon_size; + } else if (trans->dbg_dest_tlv->monitor_mode == SMEM_MODE) { + /* + * Update pointers to reflect actual values after + * shifting + */ + base = iwl_read_prph(trans, base) << + trans->dbg_dest_tlv->base_shift; + iwl_trans_read_mem(trans, base, fw_mon_data->data, + monitor_len / sizeof(u32)); + } else if (trans->dbg_dest_tlv->monitor_mode == MARBH_MODE) { + monitor_len = + iwl_trans_pci_dump_marbh_monitor(trans, + fw_mon_data, + monitor_len); + } else { + /* Didn't match anything - output no monitor data */ + monitor_len = 0; + } + + len += monitor_len; + (*data)->len = cpu_to_le32(monitor_len + sizeof(*fw_mon_data)); + } + + return len; +} + +static struct iwl_trans_dump_data +*iwl_trans_pcie_dump_data(struct iwl_trans *trans, + const struct iwl_fw_dbg_trigger_tlv *trigger) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_fw_error_dump_data *data; + struct iwl_txq *cmdq = &trans_pcie->txq[trans_pcie->cmd_queue]; + struct iwl_fw_error_dump_txcmd *txcmd; + struct iwl_trans_dump_data *dump_data; + u32 len, num_rbs; + u32 monitor_len; + int i, ptr; + bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status); + + /* transport dump header */ + len = sizeof(*dump_data); + + /* host commands */ + len += sizeof(*data) + + cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); + + /* FW monitor */ + if (trans_pcie->fw_mon_page) { + len += sizeof(*data) + sizeof(struct iwl_fw_error_dump_fw_mon) + + trans_pcie->fw_mon_size; + monitor_len = trans_pcie->fw_mon_size; + } else if (trans->dbg_dest_tlv) { + u32 base, end; + + base = le32_to_cpu(trans->dbg_dest_tlv->base_reg); + end = le32_to_cpu(trans->dbg_dest_tlv->end_reg); + + base = iwl_read_prph(trans, base) << + trans->dbg_dest_tlv->base_shift; + end = iwl_read_prph(trans, end) << + trans->dbg_dest_tlv->end_shift; + + /* Make "end" point to the actual end */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000 || + trans->dbg_dest_tlv->monitor_mode == MARBH_MODE) + end += (1 << trans->dbg_dest_tlv->end_shift); + monitor_len = end - base; + len += sizeof(*data) + sizeof(struct iwl_fw_error_dump_fw_mon) + + monitor_len; + } else { + monitor_len = 0; + } + + if (trigger && (trigger->mode & IWL_FW_DBG_TRIGGER_MONITOR_ONLY)) { + dump_data = vzalloc(len); + if (!dump_data) + return NULL; + + data = (void *)dump_data->data; + len = iwl_trans_pcie_dump_monitor(trans, &data, monitor_len); + dump_data->len = len; + + return dump_data; + } + + /* CSR registers */ + len += sizeof(*data) + IWL_CSR_TO_DUMP; + + /* FH registers */ + len += sizeof(*data) + (FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND); + + if (dump_rbs) { + /* RBs */ + num_rbs = le16_to_cpu(ACCESS_ONCE( + trans_pcie->rxq.rb_stts->closed_rb_num)) + & 0x0FFF; + num_rbs = (num_rbs - trans_pcie->rxq.read) & RX_QUEUE_MASK; + len += num_rbs * (sizeof(*data) + + sizeof(struct iwl_fw_error_dump_rb) + + (PAGE_SIZE << trans_pcie->rx_page_order)); + } + + dump_data = vzalloc(len); + if (!dump_data) + return NULL; + + len = 0; + data = (void *)dump_data->data; + data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD); + txcmd = (void *)data->data; + spin_lock_bh(&cmdq->lock); + ptr = cmdq->q.write_ptr; + for (i = 0; i < cmdq->q.n_window; i++) { + u8 idx = get_cmd_index(&cmdq->q, ptr); + u32 caplen, cmdlen; + + cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]); + caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); + + if (cmdlen) { + len += sizeof(*txcmd) + caplen; + txcmd->cmdlen = cpu_to_le32(cmdlen); + txcmd->caplen = cpu_to_le32(caplen); + memcpy(txcmd->data, cmdq->entries[idx].cmd, caplen); + txcmd = (void *)((u8 *)txcmd->data + caplen); + } + + ptr = iwl_queue_dec_wrap(ptr); + } + spin_unlock_bh(&cmdq->lock); + + data->len = cpu_to_le32(len); + len += sizeof(*data); + data = iwl_fw_error_next_data(data); + + len += iwl_trans_pcie_dump_csr(trans, &data); + len += iwl_trans_pcie_fh_regs_dump(trans, &data); + if (dump_rbs) + len += iwl_trans_pcie_dump_rbs(trans, &data, num_rbs); + + len += iwl_trans_pcie_dump_monitor(trans, &data, monitor_len); + + dump_data->len = len; + + return dump_data; +} + +static const struct iwl_trans_ops trans_ops_pcie = { + .start_hw = iwl_trans_pcie_start_hw, + .op_mode_leave = iwl_trans_pcie_op_mode_leave, + .fw_alive = iwl_trans_pcie_fw_alive, + .start_fw = iwl_trans_pcie_start_fw, + .stop_device = iwl_trans_pcie_stop_device, + + .d3_suspend = iwl_trans_pcie_d3_suspend, + .d3_resume = iwl_trans_pcie_d3_resume, + + .send_cmd = iwl_trans_pcie_send_hcmd, + + .tx = iwl_trans_pcie_tx, + .reclaim = iwl_trans_pcie_reclaim, + + .txq_disable = iwl_trans_pcie_txq_disable, + .txq_enable = iwl_trans_pcie_txq_enable, + + .wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty, + .freeze_txq_timer = iwl_trans_pcie_freeze_txq_timer, + .block_txq_ptrs = iwl_trans_pcie_block_txq_ptrs, + + .write8 = iwl_trans_pcie_write8, + .write32 = iwl_trans_pcie_write32, + .read32 = iwl_trans_pcie_read32, + .read_prph = iwl_trans_pcie_read_prph, + .write_prph = iwl_trans_pcie_write_prph, + .read_mem = iwl_trans_pcie_read_mem, + .write_mem = iwl_trans_pcie_write_mem, + .configure = iwl_trans_pcie_configure, + .set_pmi = iwl_trans_pcie_set_pmi, + .grab_nic_access = iwl_trans_pcie_grab_nic_access, + .release_nic_access = iwl_trans_pcie_release_nic_access, + .set_bits_mask = iwl_trans_pcie_set_bits_mask, + + .ref = iwl_trans_pcie_ref, + .unref = iwl_trans_pcie_unref, + + .dump_data = iwl_trans_pcie_dump_data, +}; + +struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, + const struct pci_device_id *ent, + const struct iwl_cfg *cfg) +{ + struct iwl_trans_pcie *trans_pcie; + struct iwl_trans *trans; + u16 pci_cmd; + int ret; + + trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), + &pdev->dev, cfg, &trans_ops_pcie, 0); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->max_skb_frags = IWL_PCIE_MAX_FRAGS; + + trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + trans_pcie->trans = trans; + spin_lock_init(&trans_pcie->irq_lock); + spin_lock_init(&trans_pcie->reg_lock); + spin_lock_init(&trans_pcie->ref_lock); + mutex_init(&trans_pcie->mutex); + init_waitqueue_head(&trans_pcie->ucode_write_waitq); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); + if (!trans_pcie->tso_hdr_page) { + ret = -ENOMEM; + goto out_no_pci; + } + + ret = pci_enable_device(pdev); + if (ret) + goto out_no_pci; + + if (!cfg->base_params->pcie_l1_allowed) { + /* + * W/A - seems to solve weird behavior. We need to remove this + * if we don't want to stay in L1 all the time. This wastes a + * lot of power. + */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1 | + PCIE_LINK_STATE_CLKPM); + } + + pci_set_master(pdev); + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36)); + if (ret) { + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, + DMA_BIT_MASK(32)); + /* both attempts failed: */ + if (ret) { + dev_err(&pdev->dev, "No suitable DMA available\n"); + goto out_pci_disable_device; + } + } + + ret = pci_request_regions(pdev, DRV_NAME); + if (ret) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + goto out_pci_disable_device; + } + + trans_pcie->hw_base = pci_ioremap_bar(pdev, 0); + if (!trans_pcie->hw_base) { + dev_err(&pdev->dev, "pci_ioremap_bar failed\n"); + ret = -ENODEV; + goto out_pci_release_regions; + } + + /* We disable the RETRY_TIMEOUT register (0x41) to keep + * PCI Tx retries from interfering with C3 CPU state */ + pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); + + trans->dev = &pdev->dev; + trans_pcie->pci_dev = pdev; + iwl_disable_interrupts(trans); + + ret = pci_enable_msi(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_enable_msi failed(0X%x)\n", ret); + /* enable rfkill interrupt: hw bug w/a */ + pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); + if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { + pci_cmd &= ~PCI_COMMAND_INTX_DISABLE; + pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); + } + } + + trans->hw_rev = iwl_read32(trans, CSR_HW_REV); + /* + * In the 8000 HW family the format of the 4 bytes of CSR_HW_REV have + * changed, and now the revision step also includes bit 0-1 (no more + * "dash" value). To keep hw_rev backwards compatible - we'll store it + * in the old format. + */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + unsigned long flags; + + trans->hw_rev = (trans->hw_rev & 0xfff0) | + (CSR_HW_REV_STEP(trans->hw_rev << 2) << 2); + + ret = iwl_pcie_prepare_card_hw(trans); + if (ret) { + IWL_WARN(trans, "Exit HW not ready\n"); + goto out_pci_disable_msi; + } + + /* + * in-order to recognize C step driver should read chip version + * id located at the AUX bus MISC address space. + */ + iwl_set_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + udelay(2); + + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + 25000); + if (ret < 0) { + IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n"); + goto out_pci_disable_msi; + } + + if (iwl_trans_grab_nic_access(trans, &flags)) { + u32 hw_step; + + hw_step = iwl_read_prph_no_grab(trans, WFPM_CTRL_REG); + hw_step |= ENABLE_WFPM; + iwl_write_prph_no_grab(trans, WFPM_CTRL_REG, hw_step); + hw_step = iwl_read_prph_no_grab(trans, AUX_MISC_REG); + hw_step = (hw_step >> HW_STEP_LOCATION_BITS) & 0xF; + if (hw_step == 0x3) + trans->hw_rev = (trans->hw_rev & 0xFFFFFFF3) | + (SILICON_C_STEP << 2); + iwl_trans_release_nic_access(trans, &flags); + } + } + + trans->hw_id = (pdev->device << 16) + pdev->subsystem_device; + snprintf(trans->hw_id_str, sizeof(trans->hw_id_str), + "PCI ID: 0x%04X:0x%04X", pdev->device, pdev->subsystem_device); + + /* Initialize the wait queue for commands */ + init_waitqueue_head(&trans_pcie->wait_command_queue); + + ret = iwl_pcie_alloc_ict(trans); + if (ret) + goto out_pci_disable_msi; + + ret = request_threaded_irq(pdev->irq, iwl_pcie_isr, + iwl_pcie_irq_handler, + IRQF_SHARED, DRV_NAME, trans); + if (ret) { + IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq); + goto out_free_ict; + } + + trans_pcie->inta_mask = CSR_INI_SET_MASK; + + return trans; + +out_free_ict: + iwl_pcie_free_ict(trans); +out_pci_disable_msi: + pci_disable_msi(pdev); +out_pci_release_regions: + pci_release_regions(pdev); +out_pci_disable_device: + pci_disable_device(pdev); +out_no_pci: + free_percpu(trans_pcie->tso_hdr_page); + iwl_trans_free(trans); + return ERR_PTR(ret); +} diff --git a/drivers/net/wireless/iwlwifi/dvm/lib.c b/drivers/net/wireless/iwlwifi/dvm/lib.c index 1d2223df5cb01..e7d3566c714bc 100644 --- a/drivers/net/wireless/iwlwifi/dvm/lib.c +++ b/drivers/net/wireless/iwlwifi/dvm/lib.c @@ -1022,7 +1022,7 @@ static void iwlagn_wowlan_program_keys(struct ieee80211_hw *hw, u8 *pn = seq.ccmp.pn; ieee80211_get_key_rx_seq(key, i, &seq); - aes_sc->pn = cpu_to_le64( + aes_sc[i].pn = cpu_to_le64( (u64)pn[5] | ((u64)pn[4] << 8) | ((u64)pn[3] << 16) | diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 74ad278116be3..fd83e30eaf00b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -325,6 +325,6 @@ const struct iwl_cfg iwl7265d_n_cfg = { }; MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); -MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); +MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 8e604a3931ca6..ef20be084b24f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -540,13 +540,11 @@ static void iwl_set_hw_address_family_8000(struct device *dev, hw_addr = (const u8 *)(mac_override + MAC_ADDRESS_OVERRIDE_FAMILY_8000); - /* The byte order is little endian 16 bit, meaning 214365 */ - data->hw_addr[0] = hw_addr[1]; - data->hw_addr[1] = hw_addr[0]; - data->hw_addr[2] = hw_addr[3]; - data->hw_addr[3] = hw_addr[2]; - data->hw_addr[4] = hw_addr[5]; - data->hw_addr[5] = hw_addr[4]; + /* + * Store the MAC address from MAO section. + * No byte swapping is required in MAO section + */ + memcpy(data->hw_addr, hw_addr, ETH_ALEN); /* * Force the use of the OTP MAC address in case of reserved MAC diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 4310cf102d78e..89d6a6100c881 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -298,12 +298,12 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, u8 *pn = seq.ccmp.pn; ieee80211_get_key_rx_seq(key, i, &seq); - aes_sc->pn = cpu_to_le64((u64)pn[5] | - ((u64)pn[4] << 8) | - ((u64)pn[3] << 16) | - ((u64)pn[2] << 24) | - ((u64)pn[1] << 32) | - ((u64)pn[0] << 40)); + aes_sc[i].pn = cpu_to_le64((u64)pn[5] | + ((u64)pn[4] << 8) | + ((u64)pn[3] << 16) | + ((u64)pn[2] << 24) | + ((u64)pn[1] << 32) | + ((u64)pn[0] << 40)); } data->use_rsc_tsc = true; break; diff --git a/drivers/net/wireless/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/iwlwifi/mvm/debugfs.c index 9ac04c1ea7063..8c17b943cc6fa 100644 --- a/drivers/net/wireless/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/iwlwifi/mvm/debugfs.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +32,7 @@ * BSD LICENSE * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1356,6 +1356,7 @@ static ssize_t iwl_dbgfs_d0i3_refs_read(struct file *file, PRINT_MVM_REF(IWL_MVM_REF_UCODE_DOWN); PRINT_MVM_REF(IWL_MVM_REF_SCAN); PRINT_MVM_REF(IWL_MVM_REF_ROC); + PRINT_MVM_REF(IWL_MVM_REF_ROC_AUX); PRINT_MVM_REF(IWL_MVM_REF_P2P_CLIENT); PRINT_MVM_REF(IWL_MVM_REF_AP_IBSS); PRINT_MVM_REF(IWL_MVM_REF_USER); diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index df869633f4dd9..1e1c77a597601 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -364,7 +364,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) * abort after reading the nvm in case RF Kill is on, we will complete * the init seq later when RF kill will switch to off */ - if (iwl_mvm_is_radio_killed(mvm)) { + if (iwl_mvm_is_radio_hw_killed(mvm)) { IWL_DEBUG_RF_KILL(mvm, "jump over all phy activities due to RF kill\n"); iwl_remove_notification(&mvm->notif_wait, &calib_wait); @@ -397,7 +397,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) ret = iwl_wait_notification(&mvm->notif_wait, &calib_wait, MVM_UCODE_CALIB_TIMEOUT); - if (ret && iwl_mvm_is_radio_killed(mvm)) { + if (ret && iwl_mvm_is_radio_hw_killed(mvm)) { IWL_DEBUG_RF_KILL(mvm, "RFKILL while calibrating.\n"); ret = 1; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index dda9f7b5f3423..90e8b662e44d3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1404,7 +1404,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) * The work item could be running or queued if the * ROC time event stops just as we get here. */ - cancel_work_sync(&mvm->roc_done_wk); + flush_work(&mvm->roc_done_wk); iwl_trans_stop_device(mvm->trans); @@ -2277,6 +2277,7 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw, iwl_mvm_remove_time_event(mvm, mvmvif, &mvmvif->time_event_data); RCU_INIT_POINTER(mvm->csa_vif, NULL); + mvmvif->csa_countdown = false; } if (rcu_access_pointer(mvm->csa_tx_blocked_vif) == vif) { @@ -2796,6 +2797,10 @@ static int iwl_mvm_mac_sched_scan_start(struct ieee80211_hw *hw, struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + /* we don't support "match all" in the firmware */ + if (!req->n_match_sets) + return -EOPNOTSUPP; + if (!(mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { ret = iwl_mvm_cancel_scan_wait_notif(mvm, IWL_MVM_SCAN_OS); if (ret) diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index cf70f681d1acb..83273adfabdd8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -275,6 +275,7 @@ enum iwl_mvm_ref_type { IWL_MVM_REF_UCODE_DOWN, IWL_MVM_REF_SCAN, IWL_MVM_REF_ROC, + IWL_MVM_REF_ROC_AUX, IWL_MVM_REF_P2P_CLIENT, IWL_MVM_REF_AP_IBSS, IWL_MVM_REF_USER, @@ -869,6 +870,11 @@ static inline bool iwl_mvm_is_radio_killed(struct iwl_mvm *mvm) test_bit(IWL_MVM_STATUS_HW_CTKILL, &mvm->status); } +static inline bool iwl_mvm_is_radio_hw_killed(struct iwl_mvm *mvm) +{ + return test_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); +} + /* Must be called with rcu_read_lock() held and it can only be * released when mvmsta is not needed anymore. */ diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 2ea01238754eb..8d4f287dca3bc 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -589,6 +589,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, ieee80211_unregister_hw(mvm->hw); iwl_mvm_leds_exit(mvm); out_free: + flush_delayed_work(&mvm->fw_dump_wk); iwl_phy_db_free(mvm->phy_db); kfree(mvm->scan_cmd); if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name) diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c index fd7b0d36f9a62..a7448cf016887 100644 --- a/drivers/net/wireless/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +32,7 @@ * BSD LICENSE * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -108,12 +108,14 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk) * in the case that the time event actually completed in the firmware * (which is handled in iwl_mvm_te_handle_notif). */ - if (test_and_clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status)) + if (test_and_clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status)) { queues |= BIT(IWL_MVM_OFFCHANNEL_QUEUE); - if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) + iwl_mvm_unref(mvm, IWL_MVM_REF_ROC); + } + if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) { queues |= BIT(mvm->aux_queue); - - iwl_mvm_unref(mvm, IWL_MVM_REF_ROC); + iwl_mvm_unref(mvm, IWL_MVM_REF_ROC_AUX); + } synchronize_net(); @@ -393,6 +395,7 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, } else if (le32_to_cpu(notif->action) == TE_V2_NOTIF_HOST_EVENT_START) { set_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); te_data->running = true; + iwl_mvm_ref(mvm, IWL_MVM_REF_ROC_AUX); ieee80211_ready_on_channel(mvm->hw); /* Start TE */ } else { IWL_DEBUG_TE(mvm, diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index ef32e177f662b..281451c274ca3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -225,7 +225,7 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, if (info->band == IEEE80211_BAND_2GHZ && !iwl_mvm_bt_coex_is_shared_ant_avail(mvm)) - rate_flags = BIT(mvm->cfg->non_shared_ant) << RATE_MCS_ANT_POS; + rate_flags = mvm->cfg->non_shared_ant << RATE_MCS_ANT_POS; else rate_flags = BIT(mvm->mgmt_last_antenna_idx) << RATE_MCS_ANT_POS; diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index b185697349223..9faf69875faba 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -382,6 +382,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095B, 0x5310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_n_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5210, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5C10, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, @@ -399,10 +400,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095A, 0x900A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9110, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9112, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9210, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9210, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x9200, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9510, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9310, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9410, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5020, iwl7265_2n_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x502A, iwl7265_2n_cfg)}, @@ -412,18 +413,30 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095A, 0x5590, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5290, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5490, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5F10, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x5212, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x520A, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9000, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9400, iwl7265_2ac_cfg)}, /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD010, iwl8260_2ac_cfg)}, @@ -432,18 +445,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)}, {IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)}, #endif /* CONFIG_IWLMVM */ {0} diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index dc179094e6a0d..840c47d8e2ce4 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -7,6 +7,7 @@ * * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -457,10 +459,16 @@ static void iwl_pcie_apm_stop(struct iwl_trans *trans, bool op_mode_leave) if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, APMG_PCIDEV_STT_VAL_WAKE_ME); - else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, CSR_HW_IF_CONFIG_REG_PREPARE | CSR_HW_IF_CONFIG_REG_ENABLE_PME); + mdelay(1); + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + } mdelay(5); } @@ -555,6 +563,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) if (ret >= 0) return 0; + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + msleep(1); + for (iter = 0; iter < 10; iter++) { /* If HW is not ready, prepare the conditions to check again */ iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, @@ -871,9 +883,16 @@ static void iwl_pcie_apply_destination(struct iwl_trans *trans) if (dest->monitor_mode == EXTERNAL_MODE && trans_pcie->fw_mon_size) { iwl_write_prph(trans, le32_to_cpu(dest->base_reg), trans_pcie->fw_mon_phys >> dest->base_shift); - iwl_write_prph(trans, le32_to_cpu(dest->end_reg), - (trans_pcie->fw_mon_phys + - trans_pcie->fw_mon_size) >> dest->end_shift); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size - 256) >> + dest->end_shift); + else + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> + dest->end_shift); } } @@ -2515,6 +2534,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans->hw_rev = (trans->hw_rev & 0xfff0) | (CSR_HW_REV_STEP(trans->hw_rev << 2) << 2); + ret = iwl_pcie_prepare_card_hw(trans); + if (ret) { + IWL_WARN(trans, "Exit HW not ready\n"); + goto out_pci_disable_msi; + } + /* * in-order to recognize C step driver should read chip version * id located at the AUX bus MISC address space. diff --git a/drivers/net/wireless/mwifiex/debugfs.c b/drivers/net/wireless/mwifiex/debugfs.c index 1fb329dc67445..24e48bddf186a 100644 --- a/drivers/net/wireless/mwifiex/debugfs.c +++ b/drivers/net/wireless/mwifiex/debugfs.c @@ -593,7 +593,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, (struct mwifiex_private *) file->private_data; unsigned long addr = get_zeroed_page(GFP_KERNEL); char *buf = (char *) addr; - int pos = 0, ret = 0, i; + int pos, ret, i; u8 value[MAX_EEPROM_DATA]; if (!buf) @@ -601,7 +601,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, if (saved_offset == -1) { /* No command has been given */ - pos += snprintf(buf, PAGE_SIZE, "0"); + pos = snprintf(buf, PAGE_SIZE, "0"); goto done; } @@ -610,17 +610,17 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, (u16) saved_bytes, value); if (ret) { ret = -EINVAL; - goto done; + goto out_free; } - pos += snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes); + pos = snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes); for (i = 0; i < saved_bytes; i++) - pos += snprintf(buf + strlen(buf), PAGE_SIZE, "%d ", value[i]); - - ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos); + pos += scnprintf(buf + pos, PAGE_SIZE - pos, "%d ", value[i]); done: + ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos); +out_free: free_page(addr); return ret; } diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c index b6cc9ff47fc2e..40d72312f3df2 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c @@ -172,6 +172,7 @@ static int rsi_load_ta_instructions(struct rsi_common *common) (struct rsi_91x_sdiodev *)adapter->rsi_dev; u32 len; u32 num_blocks; + const u8 *fw; const struct firmware *fw_entry = NULL; u32 block_size = dev->tx_blk_size; int status = 0; @@ -200,6 +201,12 @@ static int rsi_load_ta_instructions(struct rsi_common *common) return status; } + /* Copy firmware into DMA-accessible memory */ + fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); + if (!fw) { + status = -ENOMEM; + goto out; + } len = fw_entry->size; if (len % 4) @@ -210,7 +217,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) rsi_dbg(INIT_ZONE, "%s: Instruction size:%d\n", __func__, len); rsi_dbg(INIT_ZONE, "%s: num blocks: %d\n", __func__, num_blocks); - status = rsi_copy_to_card(common, fw_entry->data, len, num_blocks); + status = rsi_copy_to_card(common, fw, len, num_blocks); + kfree(fw); + +out: release_firmware(fw_entry); return status; } diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c index 1106ce76707e1..de4900862836a 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c @@ -146,7 +146,12 @@ static int rsi_load_ta_instructions(struct rsi_common *common) return status; } + /* Copy firmware into DMA-accessible memory */ fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); + if (!fw) { + status = -ENOMEM; + goto out; + } len = fw_entry->size; if (len % 4) @@ -158,6 +163,9 @@ static int rsi_load_ta_instructions(struct rsi_common *common) rsi_dbg(INIT_ZONE, "%s: num blocks: %d\n", __func__, num_blocks); status = rsi_copy_to_card(common, fw, len, num_blocks); + kfree(fw); + +out: release_firmware(fw_entry); return status; } diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index 3b3a88b53b119..585d0883c7e58 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1015,9 +1015,12 @@ static void send_beacon_frame(struct ieee80211_hw *hw, { struct rtl_priv *rtlpriv = rtl_priv(hw); struct sk_buff *skb = ieee80211_beacon_get(hw, vif); + struct rtl_tcb_desc tcb_desc; - if (skb) - rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, NULL); + if (skb) { + memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); + rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, &tcb_desc); + } } static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index f46c9d7f65281..7f471bff435c0 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -801,7 +801,9 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) hw_queue); if (rx_remained_cnt == 0) return; - + buffer_desc = &rtlpci->rx_ring[rxring_idx].buffer_desc[ + rtlpci->rx_ring[rxring_idx].idx]; + pdesc = (struct rtl_rx_desc *)skb->data; } else { /* rx descriptor */ pdesc = &rtlpci->rx_ring[rxring_idx].desc[ rtlpci->rx_ring[rxring_idx].idx]; @@ -824,13 +826,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) new_skb = dev_alloc_skb(rtlpci->rxbuffersize); if (unlikely(!new_skb)) goto no_new; - if (rtlpriv->use_new_trx_flow) { - buffer_desc = - &rtlpci->rx_ring[rxring_idx].buffer_desc - [rtlpci->rx_ring[rxring_idx].idx]; - /*means rx wifi info*/ - pdesc = (struct rtl_rx_desc *)skb->data; - } memset(&rx_status , 0 , sizeof(rx_status)); rtlpriv->cfg->ops->query_rx_desc(hw, &stats, &rx_status, (u8 *)pdesc, skb); diff --git a/drivers/net/wireless/rtlwifi/pci.h b/drivers/net/wireless/rtlwifi/pci.h index d4567d12e07eb..5da6703942d9d 100644 --- a/drivers/net/wireless/rtlwifi/pci.h +++ b/drivers/net/wireless/rtlwifi/pci.h @@ -247,6 +247,8 @@ struct rtl_pci { /* MSI support */ bool msi_support; bool using_msi; + /* interrupt clear before set */ + bool int_clear; }; struct mp_adapter { diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c index 86ce5b1930e6d..e5d8108f1987d 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c @@ -1354,27 +1354,11 @@ void rtl88ee_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl88ee_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl88ee_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl88ee_clear_interrupt(hw);/*clear it here first*/ rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c index 11344121c55e0..47e32cb0ec1a4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c @@ -88,8 +88,6 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) u8 tid; rtl8188ee_bt_reg_init(hw); - rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; - rtlpriv->dm.dm_initialgain_enable = 1; rtlpriv->dm.dm_flag = 0; rtlpriv->dm.disable_framebursting = 0; @@ -138,6 +136,11 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; + rtlpriv->cfg->mod_params->disable_watchdog = + rtlpriv->cfg->mod_params->disable_watchdog; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); if (!rtlpriv->psc.inactiveps) diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c index de6cb6c3a48cc..4780bdc63b2bf 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c @@ -139,6 +139,8 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; if (!rtlpriv->psc.inactiveps) pr_info("rtl8192ce: Power Save off (module option)\n"); if (!rtlpriv->psc.fwctrl_lps) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 23806c243a531..7c6f7f0d18c60 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -65,6 +65,8 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->dm.disable_framebursting = false; rtlpriv->dm.thermalvalue = 0; rtlpriv->dbg.global_debuglevel = rtlpriv->cfg->mod_params->debug; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; /* for firmware buf */ rtlpriv->rtlhal.pfirmware = vzalloc(0x4000); @@ -321,6 +323,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x07b8, 0x8188, rtl92cu_hal_cfg)}, /*Abocom - Abocom*/ {RTL_USB_DEVICE(0x07b8, 0x8189, rtl92cu_hal_cfg)}, /*Funai - Abocom*/ {RTL_USB_DEVICE(0x0846, 0x9041, rtl92cu_hal_cfg)}, /*NetGear WNA1000M*/ + {RTL_USB_DEVICE(0x0846, 0x9043, rtl92cu_hal_cfg)}, /*NG WNA1000Mv2*/ {RTL_USB_DEVICE(0x0b05, 0x17ba, rtl92cu_hal_cfg)}, /*ASUS-Edimax*/ {RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/ {RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/rtlwifi/rtl8192de/sw.c index b19d0398215fd..c6e09a19de1ac 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/sw.c @@ -376,8 +376,8 @@ module_param_named(swlps, rtl92de_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92de_mod_params.fwctrl_lps, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); diff --git a/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c index da0a6125f314b..cbf2ca7c7c6de 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c @@ -1584,28 +1584,11 @@ void rtl92ee_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl92ee_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl92ee_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl92ee_clear_interrupt(hw);/*clear it here first*/ - rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index e1fd27c888bfc..31baca41ac2f4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -187,6 +187,8 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; if (!rtlpriv->psc.inactiveps) pr_info("Power Save off (module option)\n"); if (!rtlpriv->psc.fwctrl_lps) @@ -425,8 +427,8 @@ module_param_named(swlps, rtl92se_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92se_mod_params.fwctrl_lps, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c index 67bb47d77b68c..a4b7eac6856f2 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c @@ -1258,18 +1258,6 @@ void rtl8723e_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl8723e_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); -} - void rtl8723e_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); @@ -1284,7 +1272,6 @@ void rtl8723e_disable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl8723e_clear_interrupt(hw);/*clear it here first*/ rtl_write_dword(rtlpriv, 0x3a8, IMR8190_DISABLED); rtl_write_dword(rtlpriv, 0x3ac, IMR8190_DISABLED); rtlpci->irq_enabled = false; diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/rtlwifi/rtl8723be/hw.c index b681af3c7a355..b9417268427ed 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723be/hw.c @@ -1634,28 +1634,11 @@ void rtl8723be_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl8723be_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl8723be_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl8723be_clear_interrupt(hw);/*clear it here first*/ - rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c index 1017f02d7bf75..7bf88d9dcdc3f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c @@ -385,6 +385,7 @@ module_param_named(debug, rtl8723be_mod_params.debug, int, 0444); module_param_named(ips, rtl8723be_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8723be_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444); +module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8723be_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c index 8704eee9f3a49..76e52dfb2be5d 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c @@ -2180,7 +2180,7 @@ static int _rtl8821ae_set_media_status(struct ieee80211_hw *hw, rtl_write_byte(rtlpriv, MSR, bt_msr); rtlpriv->cfg->ops->led_control(hw, ledaction); - if ((bt_msr & 0xfc) == MSR_AP) + if ((bt_msr & MSR_MASK) == MSR_AP) rtl_write_byte(rtlpriv, REG_BCNTCFG + 1, 0x00); else rtl_write_byte(rtlpriv, REG_BCNTCFG + 1, 0x66); @@ -2256,18 +2256,14 @@ void rtl8821ae_set_qos(struct ieee80211_hw *hw, int aci) static void rtl8821ae_clear_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - tmp = rtl_read_dword(rtlpriv, REG_HISR); - /*printk("clear interrupt first:\n"); - printk("0x%x = 0x%08x\n",REG_HISR, tmp);*/ + u32 tmp = rtl_read_dword(rtlpriv, REG_HISR); + rtl_write_dword(rtlpriv, REG_HISR, tmp); tmp = rtl_read_dword(rtlpriv, REG_HISRE); - /*printk("0x%x = 0x%08x\n",REG_HISRE, tmp);*/ rtl_write_dword(rtlpriv, REG_HISRE, tmp); tmp = rtl_read_dword(rtlpriv, REG_HSISR); - /*printk("0x%x = 0x%08x\n",REG_HSISR, tmp);*/ rtl_write_dword(rtlpriv, REG_HSISR, tmp); } @@ -2276,7 +2272,8 @@ void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl8821ae_clear_interrupt(hw);/*clear it here first*/ + if (!rtlpci->int_clear) + rtl8821ae_clear_interrupt(hw);/*clear it here first*/ rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h b/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h index 53668fc8f23e2..1d6110f9c1fb6 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h @@ -429,6 +429,7 @@ #define MSR_ADHOC 0x01 #define MSR_INFRA 0x02 #define MSR_AP 0x03 +#define MSR_MASK 0x03 #define RRSR_RSC_OFFSET 21 #define RRSR_SHORT_OFFSET 23 diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c index a4988121e1ab6..8ee141a55bc5c 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c @@ -96,6 +96,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtl8821ae_bt_reg_init(hw); rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpci->int_clear = rtlpriv->cfg->mod_params->int_clear; rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); rtlpriv->dm.dm_initialgain_enable = 1; @@ -167,6 +168,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpci->msi_support = rtlpriv->cfg->mod_params->int_clear; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); rtlpriv->psc.reg_fwctrl_lps = 3; @@ -308,6 +310,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .msi_support = true, + .int_clear = true, .debug = DBG_EMERG, .disable_watchdog = 0, }; @@ -437,6 +440,7 @@ module_param_named(fwlps, rtl8821ae_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl8821ae_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8821ae_mod_params.disable_watchdog, bool, 0444); +module_param_named(int_clear, rtl8821ae_mod_params.int_clear, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); @@ -444,6 +448,7 @@ MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); +MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c index 2721cf89fb160..aac1ed3f7bb41 100644 --- a/drivers/net/wireless/rtlwifi/usb.c +++ b/drivers/net/wireless/rtlwifi/usb.c @@ -531,6 +531,8 @@ static void _rtl_usb_rx_process_noagg(struct ieee80211_hw *hw, ieee80211_rx(hw, skb); else dev_kfree_skb_any(skb); + } else { + dev_kfree_skb_any(skb); } } diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index 51572912c53dc..f1fa8100f2881 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2233,6 +2233,9 @@ struct rtl_mod_params { /* default 0: 1 means disable */ bool disable_watchdog; + + /* default 0: 1 means do not disable interrupts */ + bool int_clear; }; struct rtl_hal_usbint_cfg { diff --git a/drivers/net/wireless/ti/wlcore/io.h b/drivers/net/wireless/ti/wlcore/io.h index 0305729d09868..10cf3747694d5 100644 --- a/drivers/net/wireless/ti/wlcore/io.h +++ b/drivers/net/wireless/ti/wlcore/io.h @@ -207,19 +207,23 @@ static inline int __must_check wlcore_write_reg(struct wl1271 *wl, int reg, static inline void wl1271_power_off(struct wl1271 *wl) { - int ret; + int ret = 0; if (!test_bit(WL1271_FLAG_GPIO_POWER, &wl->flags)) return; - ret = wl->if_ops->power(wl->dev, false); + if (wl->if_ops->power) + ret = wl->if_ops->power(wl->dev, false); if (!ret) clear_bit(WL1271_FLAG_GPIO_POWER, &wl->flags); } static inline int wl1271_power_on(struct wl1271 *wl) { - int ret = wl->if_ops->power(wl->dev, true); + int ret = 0; + + if (wl->if_ops->power) + ret = wl->if_ops->power(wl->dev, true); if (ret == 0) set_bit(WL1271_FLAG_GPIO_POWER, &wl->flags); diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index f1ac2839d97cb..720e4e4b5a3cd 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -73,7 +73,10 @@ */ #define SPI_AGGR_BUFFER_SIZE (4 * PAGE_SIZE) -#define WSPI_MAX_NUM_OF_CHUNKS (SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) +/* Maximum number of SPI write chunks */ +#define WSPI_MAX_NUM_OF_CHUNKS \ + ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) + struct wl12xx_spi_glue { struct device *dev; @@ -268,9 +271,10 @@ static int __must_check wl12xx_spi_raw_write(struct device *child, int addr, void *buf, size_t len, bool fixed) { struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent); - struct spi_transfer t[2 * (WSPI_MAX_NUM_OF_CHUNKS + 1)]; + /* SPI write buffers - 2 for each chunk */ + struct spi_transfer t[2 * WSPI_MAX_NUM_OF_CHUNKS]; struct spi_message m; - u32 commands[WSPI_MAX_NUM_OF_CHUNKS]; + u32 commands[WSPI_MAX_NUM_OF_CHUNKS]; /* 1 command per chunk */ u32 *cmd; u32 chunk_len; int i; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 0d2594395ffbc..5e5b6184e7205 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1571,13 +1571,13 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) smp_rmb(); while (dc != dp) { - BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS); + BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS); pending_idx = queue->dealloc_ring[pending_index(dc++)]; - pending_idx_release[gop-queue->tx_unmap_ops] = + pending_idx_release[gop - queue->tx_unmap_ops] = pending_idx; - queue->pages_to_unmap[gop-queue->tx_unmap_ops] = + queue->pages_to_unmap[gop - queue->tx_unmap_ops] = queue->mmap_pages[pending_idx]; gnttab_set_unmap_op(gop, idx_to_kaddr(queue, pending_idx), @@ -2007,8 +2007,11 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; - /* Allow as many queues as there are CPUs, by default */ - xenvif_max_queues = num_online_cpus(); + /* Allow as many queues as there are CPUs if user has not + * specified a value. + */ + if (xenvif_max_queues == 0) + xenvif_max_queues = num_online_cpus(); if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 968787abf78d4..ec383b0f54435 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -681,6 +681,9 @@ static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) char *node; unsigned maxlen = strlen(dev->nodename) + sizeof("/rate"); + if (vif->credit_watch.node) + return -EADDRINUSE; + node = kmalloc(maxlen, GFP_KERNEL); if (!node) return -ENOMEM; @@ -770,6 +773,7 @@ static void connect(struct backend_info *be) } xen_net_read_rate(dev, &credit_bytes, &credit_usec); + xen_unregister_watchers(be->vif); xen_register_watchers(dev, be->vif); read_xenbus_vif_flags(be); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e031c943286ef..fd51626e859e4 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1353,7 +1353,8 @@ static void xennet_disconnect_backend(struct netfront_info *info) queue->tx_evtchn = queue->rx_evtchn = 0; queue->tx_irq = queue->rx_irq = 0; - napi_synchronize(&queue->napi); + if (netif_running(info->netdev)) + napi_synchronize(&queue->napi); xennet_release_tx_bufs(queue); xennet_release_rx_bufs(queue); @@ -1709,19 +1710,19 @@ static void xennet_destroy_queues(struct netfront_info *info) } static int xennet_create_queues(struct netfront_info *info, - unsigned int num_queues) + unsigned int *num_queues) { unsigned int i; int ret; - info->queues = kcalloc(num_queues, sizeof(struct netfront_queue), + info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue), GFP_KERNEL); if (!info->queues) return -ENOMEM; rtnl_lock(); - for (i = 0; i < num_queues; i++) { + for (i = 0; i < *num_queues; i++) { struct netfront_queue *queue = &info->queues[i]; queue->id = i; @@ -1731,7 +1732,7 @@ static int xennet_create_queues(struct netfront_info *info, if (ret < 0) { dev_warn(&info->netdev->dev, "only created %d queues\n", i); - num_queues = i; + *num_queues = i; break; } @@ -1741,11 +1742,11 @@ static int xennet_create_queues(struct netfront_info *info, napi_enable(&queue->napi); } - netif_set_real_num_tx_queues(info->netdev, num_queues); + netif_set_real_num_tx_queues(info->netdev, *num_queues); rtnl_unlock(); - if (num_queues == 0) { + if (*num_queues == 0) { dev_err(&info->netdev->dev, "no queues\n"); return -EINVAL; } @@ -1791,7 +1792,7 @@ static int talk_to_netback(struct xenbus_device *dev, if (info->queues) xennet_destroy_queues(info); - err = xennet_create_queues(info, num_queues); + err = xennet_create_queues(info, &num_queues); if (err < 0) goto destroy_ring; @@ -2139,8 +2140,11 @@ static int __init netif_init(void) pr_info("Initialising Xen virtual ethernet driver\n"); - /* Allow as many queues as there are CPUs, by default */ - xennet_max_queues = num_online_cpus(); + /* Allow as many queues as there are CPUs if user has not + * specified a value. + */ + if (xennet_max_queues == 0) + xennet_max_queues = num_online_cpus(); return xenbus_register_frontend(&netfront_driver); } diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c index d251f7229c4e8..051286562fab3 100644 --- a/drivers/nfc/st21nfca/st21nfca.c +++ b/drivers/nfc/st21nfca/st21nfca.c @@ -148,14 +148,14 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE); if (r < 0) - goto free_info; + return r; /* Get pipe list */ r = nfc_hci_send_cmd(hdev, ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DM_GETINFO, pipe_list, sizeof(pipe_list), &skb_pipe_list); if (r < 0) - goto free_info; + return r; /* Complete the existing gate_pipe table */ for (i = 0; i < skb_pipe_list->len; i++) { @@ -181,6 +181,7 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) info->src_host_id != ST21NFCA_ESE_HOST_ID) { pr_err("Unexpected apdu_reader pipe on host %x\n", info->src_host_id); + kfree_skb(skb_pipe_info); continue; } @@ -200,6 +201,7 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) hdev->pipes[st21nfca_gates[j].pipe].dest_host = info->src_host_id; } + kfree_skb(skb_pipe_info); } /* @@ -214,13 +216,12 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) st21nfca_gates[i].gate, st21nfca_gates[i].pipe); if (r < 0) - goto free_info; + goto free_list; } } memcpy(hdev->init_data.gates, st21nfca_gates, sizeof(st21nfca_gates)); -free_info: - kfree_skb(skb_pipe_info); +free_list: kfree_skb(skb_pipe_list); return r; } diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c index 76a4cad41cec9..c44f8cf5391a6 100644 --- a/drivers/nfc/st21nfcb/i2c.c +++ b/drivers/nfc/st21nfcb/i2c.c @@ -87,11 +87,6 @@ static void st21nfcb_nci_i2c_disable(void *phy_id) gpio_set_value(phy->gpio_reset, 1); } -static void st21nfcb_nci_remove_header(struct sk_buff *skb) -{ - skb_pull(skb, ST21NFCB_FRAME_HEADROOM); -} - /* * Writing a frame must not return the number of written bytes. * It must return either zero for success, or <0 for error. @@ -121,8 +116,6 @@ static int st21nfcb_nci_i2c_write(void *phy_id, struct sk_buff *skb) r = 0; } - st21nfcb_nci_remove_header(skb); - return r; } @@ -366,9 +359,6 @@ static int st21nfcb_nci_i2c_remove(struct i2c_client *client) ndlc_remove(phy->ndlc); - if (phy->powered) - st21nfcb_nci_i2c_disable(phy); - return 0; } diff --git a/drivers/nfc/st21nfcb/st21nfcb.c b/drivers/nfc/st21nfcb/st21nfcb.c index ca9871ab3fb3c..c7dc282d5c3be 100644 --- a/drivers/nfc/st21nfcb/st21nfcb.c +++ b/drivers/nfc/st21nfcb/st21nfcb.c @@ -131,11 +131,8 @@ EXPORT_SYMBOL_GPL(st21nfcb_nci_probe); void st21nfcb_nci_remove(struct nci_dev *ndev) { - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - nci_unregister_device(ndev); nci_free_device(ndev); - kfree(info); } EXPORT_SYMBOL_GPL(st21nfcb_nci_remove); diff --git a/drivers/of/address.c b/drivers/of/address.c index 78a7dcbec7d89..384574c3987c3 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -712,7 +712,7 @@ int __weak pci_register_io_range(phys_addr_t addr, resource_size_t size) } /* add the range to the list */ - range = kzalloc(sizeof(*range), GFP_KERNEL); + range = kzalloc(sizeof(*range), GFP_ATOMIC); if (!range) { err = -ENOMEM; goto end_register; @@ -765,7 +765,7 @@ unsigned long __weak pci_address_to_pio(phys_addr_t address) spin_lock(&io_range_lock); list_for_each_entry(res, &io_range_list, list) { if (address >= res->start && address < res->start + res->size) { - addr = res->start - address + offset; + addr = address - res->start + offset; break; } offset += res->size; @@ -845,10 +845,10 @@ struct device_node *of_find_matching_node_by_address(struct device_node *from, struct resource res; while (dn) { - if (of_address_to_resource(dn, 0, &res)) - continue; - if (res.start == base_address) + if (!of_address_to_resource(dn, 0, &res) && + res.start == base_address) return dn; + dn = of_find_matching_node(dn, matches); } diff --git a/drivers/of/base.c b/drivers/of/base.c index f0650265febf9..5ed97246c2e7f 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL(of_n_size_cells); #ifdef CONFIG_NUMA int __weak of_node_to_nid(struct device_node *np) { - return numa_node_id(); + return NUMA_NO_NODE; } #endif diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index cde35c5d0191b..d91f721a05b66 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -955,7 +955,9 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, } #ifdef CONFIG_HAVE_MEMBLOCK -#define MAX_PHYS_ADDR ((phys_addr_t)~0) +#ifndef MAX_MEMBLOCK_ADDR +#define MAX_MEMBLOCK_ADDR ((phys_addr_t)~0) +#endif void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) { @@ -972,16 +974,16 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) } size &= PAGE_MASK; - if (base > MAX_PHYS_ADDR) { + if (base > MAX_MEMBLOCK_ADDR) { pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", base, base + size); return; } - if (base + size - 1 > MAX_PHYS_ADDR) { + if (base + size - 1 > MAX_MEMBLOCK_ADDR) { pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", - ((u64)MAX_PHYS_ADDR) + 1, base + size); - size = MAX_PHYS_ADDR - base + 1; + ((u64)MAX_MEMBLOCK_ADDR) + 1, base + size); + size = MAX_MEMBLOCK_ADDR - base + 1; } if (base + size < phys_offset) { diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 0c064485d1c2c..bec8ec2b31f63 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -263,7 +263,8 @@ EXPORT_SYMBOL(of_phy_attach); bool of_phy_is_fixed_link(struct device_node *np) { struct device_node *dn; - int len; + int len, err; + const char *managed; /* New binding */ dn = of_get_child_by_name(np, "fixed-link"); @@ -272,6 +273,10 @@ bool of_phy_is_fixed_link(struct device_node *np) return true; } + err = of_property_read_string(np, "managed", &managed); + if (err == 0 && strcmp(managed, "auto") != 0) + return true; + /* Old binding */ if (of_get_property(np, "fixed-link", &len) && len == (5 * sizeof(__be32))) @@ -286,8 +291,18 @@ int of_phy_register_fixed_link(struct device_node *np) struct fixed_phy_status status = {}; struct device_node *fixed_link_node; const __be32 *fixed_link_prop; - int len; + int len, err; struct phy_device *phy; + const char *managed; + + err = of_property_read_string(np, "managed", &managed); + if (err == 0) { + if (strcmp(managed, "in-band-status") == 0) { + /* status is zeroed, namely its .link member */ + phy = fixed_phy_register(PHY_POLL, &status, np); + return IS_ERR(phy) ? PTR_ERR(phy) : 0; + } + } /* New binding */ fixed_link_node = of_get_child_by_name(np, "fixed-link"); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index a01f57c9e34ea..ddf8e42c9367d 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -25,6 +25,7 @@ const struct of_device_id of_default_bus_match_table[] = { { .compatible = "simple-bus", }, + { .compatible = "simple-mfd", }, #ifdef CONFIG_ARM_AMBA { .compatible = "arm,amba-bus", }, #endif /* CONFIG_ARM_AMBA */ diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h index 761e77bfce5d2..e56f1569f6c39 100644 --- a/drivers/parisc/iommu-helpers.h +++ b/drivers/parisc/iommu-helpers.h @@ -104,7 +104,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, struct scatterlist *contig_sg; /* contig chunk head */ unsigned long dma_offset, dma_len; /* start/len of DMA stream */ unsigned int n_mappings = 0; - unsigned int max_seg_size = dma_get_max_seg_size(dev); + unsigned int max_seg_size = min(dma_get_max_seg_size(dev), + (unsigned)DMA_CHUNK_SIZE); + unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1; + if (max_seg_boundary) /* check if the addition above didn't overflow */ + max_seg_size = min(max_seg_size, max_seg_boundary); while (nents > 0) { @@ -138,14 +142,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, /* ** First make sure current dma stream won't - ** exceed DMA_CHUNK_SIZE if we coalesce the + ** exceed max_seg_size if we coalesce the ** next entry. */ - if(unlikely(ALIGN(dma_len + dma_offset + startsg->length, - IOVP_SIZE) > DMA_CHUNK_SIZE)) - break; - - if (startsg->length + dma_len > max_seg_size) + if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > + max_seg_size)) break; /* diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index dceb9ddfd99af..a32c1f6c252cd 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1556,8 +1556,11 @@ lba_driver_probe(struct parisc_device *dev) if (lba_dev->hba.lmmio_space.flags) pci_add_resource_offset(&resources, &lba_dev->hba.lmmio_space, lba_dev->hba.lmmio_space_offset); - if (lba_dev->hba.gmmio_space.flags) - pci_add_resource(&resources, &lba_dev->hba.gmmio_space); + if (lba_dev->hba.gmmio_space.flags) { + /* pci_add_resource(&resources, &lba_dev->hba.gmmio_space); */ + pr_warn("LBA: Not registering GMMIO space %pR\n", + &lba_dev->hba.gmmio_space); + } pci_add_resource(&resources, &lba_dev->hba.bus_num); diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7a8f1c5e65af1..73de4efcbe6ed 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -1,6 +1,10 @@ # # PCI configuration # +config PCI_BUS_ADDR_T_64BIT + def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) + depends on PCI + config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" depends on PCI diff --git a/drivers/pci/access.c b/drivers/pci/access.c index d9b64a175990c..502a82ca1db05 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -439,6 +439,42 @@ static const struct pci_vpd_ops pci_vpd_pci22_ops = { .release = pci_vpd_pci22_release, }; +static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, + void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_read_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, + const void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_write_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static const struct pci_vpd_ops pci_vpd_f0_ops = { + .read = pci_vpd_f0_read, + .write = pci_vpd_f0_write, + .release = pci_vpd_pci22_release, +}; + int pci_vpd_pci22_init(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd; @@ -447,12 +483,16 @@ int pci_vpd_pci22_init(struct pci_dev *dev) cap = pci_find_capability(dev, PCI_CAP_ID_VPD); if (!cap) return -ENODEV; + vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC); if (!vpd) return -ENOMEM; vpd->base.len = PCI_VPD_PCI22_SIZE; - vpd->base.ops = &pci_vpd_pci22_ops; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) + vpd->base.ops = &pci_vpd_f0_ops; + else + vpd->base.ops = &pci_vpd_pci22_ops; mutex_init(&vpd->lock); vpd->cap = cap; vpd->busy = false; diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 90fa3a78fb7ce..89b3befc71553 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -92,11 +92,11 @@ void pci_bus_remove_resources(struct pci_bus *bus) } static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL}; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT static struct pci_bus_region pci_64_bit = {0, - (dma_addr_t) 0xffffffffffffffffULL}; -static struct pci_bus_region pci_high = {(dma_addr_t) 0x100000000ULL, - (dma_addr_t) 0xffffffffffffffffULL}; + (pci_bus_addr_t) 0xffffffffffffffffULL}; +static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL, + (pci_bus_addr_t) 0xffffffffffffffffULL}; #endif /* @@ -140,6 +140,8 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, type_mask |= IORESOURCE_TYPE_BITS; pci_bus_for_each_resource(bus, r, i) { + resource_size_t min_used = min; + if (!r) continue; @@ -163,12 +165,12 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, * overrides "min". */ if (avail.start) - min = avail.start; + min_used = avail.start; max = avail.end; /* Ok, try it out.. */ - ret = allocate_resource(r, res, size, min, max, + ret = allocate_resource(r, res, size, min_used, max, align, alignf, alignf_data); if (ret == 0) return 0; @@ -200,7 +202,7 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t), void *alignf_data) { -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT int rc; if (res->flags & IORESOURCE_MEM_64) { @@ -256,6 +258,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx) res->start = start; res->end = end; + res->flags &= ~IORESOURCE_UNSET; + orig_res.flags &= ~IORESOURCE_UNSET; dev_printk(KERN_DEBUG, &dev->dev, "%pR clipped to %pR\n", &orig_res, res); diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c index 2d57e19a2cd43..b5ae685aec610 100644 --- a/drivers/pci/host/pci-dra7xx.c +++ b/drivers/pci/host/pci-dra7xx.c @@ -289,7 +289,8 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx, } ret = devm_request_irq(&pdev->dev, pp->irq, - dra7xx_pcie_msi_irq_handler, IRQF_SHARED, + dra7xx_pcie_msi_irq_handler, + IRQF_SHARED | IRQF_NO_THREAD, "dra7-pcie-msi", pp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); diff --git a/drivers/pci/host/pci-exynos.c b/drivers/pci/host/pci-exynos.c index c139237e0e523..5b2b83cb67ad8 100644 --- a/drivers/pci/host/pci-exynos.c +++ b/drivers/pci/host/pci-exynos.c @@ -527,7 +527,8 @@ static int __init exynos_add_pcie_port(struct pcie_port *pp, ret = devm_request_irq(&pdev->dev, pp->msi_irq, exynos_pcie_msi_irq_handler, - IRQF_SHARED, "exynos-pcie", pp); + IRQF_SHARED | IRQF_NO_THREAD, + "exynos-pcie", pp); if (ret) { dev_err(&pdev->dev, "failed to request msi irq\n"); return ret; diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index fdb95367721e9..ebcb0ac8512b5 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -534,7 +534,8 @@ static int __init imx6_add_pcie_port(struct pcie_port *pp, ret = devm_request_irq(&pdev->dev, pp->msi_irq, imx6_pcie_msi_handler, - IRQF_SHARED, "mx6-pcie-msi", pp); + IRQF_SHARED | IRQF_NO_THREAD, + "mx6-pcie-msi", pp); if (ret) { dev_err(&pdev->dev, "failed to request MSI irq\n"); return -ENODEV; diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 00e92720d7f79..d9789d6ba47d5 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -1304,7 +1304,7 @@ static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) msi->irq = err; - err = request_irq(msi->irq, tegra_pcie_msi_irq, 0, + err = request_irq(msi->irq, tegra_pcie_msi_irq, IRQF_NO_THREAD, tegra_msi_irq_chip.name, pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index c086210f2ffd1..56ce5640d91ae 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -695,14 +695,16 @@ static int rcar_pcie_enable_msi(struct rcar_pcie *pcie) /* Two irqs are for MSI, but they are also used for non-MSI irqs */ err = devm_request_irq(&pdev->dev, msi->irq1, rcar_pcie_msi_irq, - IRQF_SHARED, rcar_msi_irq_chip.name, pcie); + IRQF_SHARED | IRQF_NO_THREAD, + rcar_msi_irq_chip.name, pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); goto err; } err = devm_request_irq(&pdev->dev, msi->irq2, rcar_pcie_msi_irq, - IRQF_SHARED, rcar_msi_irq_chip.name, pcie); + IRQF_SHARED | IRQF_NO_THREAD, + rcar_msi_irq_chip.name, pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); goto err; diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c index 020d788907191..4ea793eaa2bd5 100644 --- a/drivers/pci/host/pcie-spear13xx.c +++ b/drivers/pci/host/pcie-spear13xx.c @@ -281,7 +281,8 @@ static int spear13xx_add_pcie_port(struct pcie_port *pp, return -ENODEV; } ret = devm_request_irq(dev, pp->irq, spear13xx_pcie_irq_handler, - IRQF_SHARED, "spear1340-pcie", pp); + IRQF_SHARED | IRQF_NO_THREAD, + "spear1340-pcie", pp); if (ret) { dev_err(dev, "failed to request irq %d\n", pp->irq); return ret; diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c index f1a06a091ccb5..577fe5b2f6175 100644 --- a/drivers/pci/host/pcie-xilinx.c +++ b/drivers/pci/host/pcie-xilinx.c @@ -776,7 +776,8 @@ static int xilinx_pcie_parse_dt(struct xilinx_pcie_port *port) port->irq = irq_of_parse_and_map(node, 0); err = devm_request_irq(dev, port->irq, xilinx_pcie_intr_handler, - IRQF_SHARED, "xilinx-pcie", port); + IRQF_SHARED | IRQF_NO_THREAD, + "xilinx-pcie", port); if (err) { dev_err(dev, "unable to request irq %d\n", port->irq); return err; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index bcb90e4888dd8..b60309ee80ed0 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -954,8 +954,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot) { pci_lock_rescan_remove(); - if (slot->flags & SLOT_IS_GOING_AWAY) + if (slot->flags & SLOT_IS_GOING_AWAY) { + pci_unlock_rescan_remove(); return -ENODEV; + } /* configure all functions */ if (!(slot->flags & SLOT_ENABLED)) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0ebf754fc1775..6d6868811e56e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -176,20 +176,17 @@ static void pcie_wait_cmd(struct controller *ctrl) jiffies_to_msecs(jiffies - ctrl->cmd_started)); } -/** - * pcie_write_cmd - Issue controller command - * @ctrl: controller to which the command is issued - * @cmd: command value written to slot control register - * @mask: bitmask of slot control register to be modified - */ -static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) +static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd, + u16 mask, bool wait) { struct pci_dev *pdev = ctrl_dev(ctrl); u16 slot_ctrl; mutex_lock(&ctrl->ctrl_lock); - /* Wait for any previous command that might still be in progress */ + /* + * Always wait for any previous command that might still be in progress + */ pcie_wait_cmd(ctrl); pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl); @@ -201,9 +198,33 @@ static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) ctrl->cmd_started = jiffies; ctrl->slot_ctrl = slot_ctrl; + /* + * Optionally wait for the hardware to be ready for a new command, + * indicating completion of the above issued command. + */ + if (wait) + pcie_wait_cmd(ctrl); + mutex_unlock(&ctrl->ctrl_lock); } +/** + * pcie_write_cmd - Issue controller command + * @ctrl: controller to which the command is issued + * @cmd: command value written to slot control register + * @mask: bitmask of slot control register to be modified + */ +static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) +{ + pcie_do_write_cmd(ctrl, cmd, mask, true); +} + +/* Same as above without waiting for the hardware to latch */ +static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask) +{ + pcie_do_write_cmd(ctrl, cmd, mask, false); +} + bool pciehp_check_link_active(struct controller *ctrl) { struct pci_dev *pdev = ctrl_dev(ctrl); @@ -422,7 +443,7 @@ void pciehp_set_attention_status(struct slot *slot, u8 value) default: return; } - pcie_write_cmd(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC); + pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); } @@ -434,7 +455,8 @@ void pciehp_green_led_on(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_ON); @@ -447,7 +469,8 @@ void pciehp_green_led_off(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_OFF); @@ -460,7 +483,8 @@ void pciehp_green_led_blink(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_BLINK); @@ -613,7 +637,7 @@ void pcie_enable_notification(struct controller *ctrl) PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_DLLSCE); - pcie_write_cmd(ctrl, cmd, mask); + pcie_write_cmd_nowait(ctrl, cmd, mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd); } @@ -664,7 +688,7 @@ int pciehp_reset_slot(struct slot *slot, int probe) pci_reset_bridge_secondary_bus(ctrl->pcie->port); pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd(ctrl, ctrl_mask, ctrl_mask); + pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); if (pciehp_poll_mode) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 312f23a8429cd..92618686604cb 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -216,7 +216,7 @@ static ssize_t numa_node_store(struct device *dev, if (ret) return ret; - if (!node_online(node)) + if (node >= MAX_NUMNODES || !node_online(node)) return -EINVAL; add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index acc4b6ef78c43..c44393f26fd37 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4324,6 +4324,17 @@ bool pci_device_is_present(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_device_is_present); +void pci_ignore_hotplug(struct pci_dev *dev) +{ + struct pci_dev *bridge = dev->bus->self; + + dev->ignore_hotplug = 1; + /* Propagate the "ignore hotplug" setting to the parent bridge. */ + if (bridge) + bridge->ignore_hotplug = 1; +} +EXPORT_SYMBOL_GPL(pci_ignore_hotplug); + #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; static DEFINE_SPINLOCK(resource_alignment_lock); diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 0bf82a20a0fb4..48d21e0edd568 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -262,7 +262,6 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) rpc->rpd = dev; INIT_WORK(&rpc->dpc_handler, aer_isr); mutex_init(&rpc->rpc_mutex); - init_waitqueue_head(&rpc->wait_release); /* Use PCIe bus function to store rpc into PCIe device */ set_service_data(dev, rpc); @@ -285,8 +284,7 @@ static void aer_remove(struct pcie_device *dev) if (rpc->isr) free_irq(dev->irq, dev); - wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx); - + flush_work(&rpc->dpc_handler); aer_disable_rootport(rpc); kfree(rpc); set_service_data(dev, NULL); diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 84420b7c9456e..945c939a86c5c 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -72,7 +72,6 @@ struct aer_rpc { * recovery on the same * root port hierarchy */ - wait_queue_head_t wait_release; }; struct aer_broadcast_data { diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 5653ea94547fc..b60a325234c52 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -784,8 +784,6 @@ void aer_isr(struct work_struct *work) while (get_e_source(rpc, &e_src)) aer_isr_one_error(p_device, &e_src); mutex_unlock(&rpc->rpc_mutex); - - wake_up(&rpc->wait_release); } /** diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6675a7a1b9fc6..c91185721345b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -254,8 +254,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } if (res->flags & IORESOURCE_MEM_64) { - if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && - sz64 > 0x100000000ULL) { + if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8) + && sz64 > 0x100000000ULL) { res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; res->start = 0; res->end = 0; @@ -264,7 +264,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, goto out; } - if ((sizeof(dma_addr_t) < 8) && l) { + if ((sizeof(pci_bus_addr_t) < 8) && l) { /* Above 32-bit boundary; try to reallocate */ res->flags |= IORESOURCE_UNSET; res->start = 0; @@ -399,7 +399,7 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; u64 base64, limit64; - dma_addr_t base, limit; + pci_bus_addr_t base, limit; struct pci_bus_region region; struct resource *res; @@ -426,8 +426,8 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) } } - base = (dma_addr_t) base64; - limit = (dma_addr_t) limit64; + base = (pci_bus_addr_t) base64; + limit = (pci_bus_addr_t) limit64; if (base != base64) { dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n", diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c6dc1dfd25d55..4a6933f02cd01 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1576,6 +1576,18 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB3 #endif +static void quirk_jmicron_async_suspend(struct pci_dev *dev) +{ + if (dev->multifunction) { + device_disable_async_suspend(&dev->dev); + dev_info(&dev->dev, "async suspend disabled to avoid multi-function power-on ordering issue\n"); + } +} +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE, 8, quirk_jmicron_async_suspend); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_CLASS_STORAGE_SATA_AHCI, 0, quirk_jmicron_async_suspend); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_JMICRON, 0x2362, quirk_jmicron_async_suspend); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_JMICRON, 0x236f, quirk_jmicron_async_suspend); + #ifdef CONFIG_X86_IO_APIC static void quirk_alder_ioapic(struct pci_dev *pdev) { @@ -1903,6 +1915,31 @@ static void quirk_netmos(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos); +/* + * Quirk non-zero PCI functions to route VPD access through function 0 for + * devices that share VPD resources between functions. The functions are + * expected to be identical devices. + */ +static void quirk_f0_vpd_link(struct pci_dev *dev) +{ + struct pci_dev *f0; + + if (!PCI_FUNC(dev->devfn)) + return; + + f0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + if (!f0) + return; + + if (f0->vpd && dev->class == f0->class && + dev->vendor == f0->vendor && dev->device == f0->device) + dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; + + pci_dev_put(f0); +} +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, + PCI_CLASS_NETWORK_ETHERNET, 8, quirk_f0_vpd_link); + static void quirk_e100_interrupt(struct pci_dev *dev) { u16 command, pmcsr; @@ -2838,12 +2875,15 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); static void fixup_ti816x_class(struct pci_dev *dev) { + u32 class = dev->class; + /* TI 816x devices do not have class code set when in PCIe boot mode */ - dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n"); - dev->class = PCI_CLASS_MULTIMEDIA_VIDEO; + dev->class = PCI_CLASS_MULTIMEDIA_VIDEO << 8; + dev_info(&dev->dev, "PCI class overridden (%#08x -> %#08x)\n", + class, dev->class); } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_TI, 0xb800, - PCI_CLASS_NOT_DEFINED, 0, fixup_ti816x_class); + PCI_CLASS_NOT_DEFINED, 0, fixup_ti816x_class); /* Some PCIe devices do not work reliably with the claimed maximum * payload size supported. diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 7cfd2db02deb3..914655e896770 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -52,7 +52,7 @@ struct pcifront_device { }; struct pcifront_sd { - int domain; + struct pci_sysdata sd; struct pcifront_device *pdev; }; @@ -66,7 +66,9 @@ static inline void pcifront_init_sd(struct pcifront_sd *sd, unsigned int domain, unsigned int bus, struct pcifront_device *pdev) { - sd->domain = domain; + /* Because we do not expose that information via XenBus. */ + sd->sd.node = first_online_node; + sd->sd.domain = domain; sd->pdev = pdev; } @@ -464,8 +466,8 @@ static int pcifront_scan_root(struct pcifront_device *pdev, dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", domain, bus); - bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); - sd = kmalloc(sizeof(*sd), GFP_KERNEL); + bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!bus_entry || !sd) { err = -ENOMEM; goto err_out; diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c index 803945259da80..42861cc701580 100644 --- a/drivers/pcmcia/sa1100_generic.c +++ b/drivers/pcmcia/sa1100_generic.c @@ -93,7 +93,6 @@ static int sa11x0_drv_pcmcia_remove(struct platform_device *dev) for (i = 0; i < sinfo->nskt; i++) soc_pcmcia_remove_one(&sinfo->skt[i]); - clk_put(sinfo->clk); kfree(sinfo); return 0; } diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index cf6de2c2b3293..553d70a67f808 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -222,7 +222,7 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int i, ret = 0; struct clk *clk; - clk = clk_get(dev, NULL); + clk = devm_clk_get(dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); @@ -251,7 +251,6 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, if (ret) { while (--i >= 0) soc_pcmcia_remove_one(&sinfo->skt[i]); - clk_put(clk); kfree(sinfo); } else { dev_set_drvdata(dev, sinfo); diff --git a/drivers/pcmcia/topic.h b/drivers/pcmcia/topic.h index 615a45a8fe867..582688fe75054 100644 --- a/drivers/pcmcia/topic.h +++ b/drivers/pcmcia/topic.h @@ -104,6 +104,9 @@ #define TOPIC_EXCA_IF_CONTROL 0x3e /* 8 bit */ #define TOPIC_EXCA_IFC_33V_ENA 0x01 +#define TOPIC_PCI_CFG_PPBCN 0x3e /* 16-bit */ +#define TOPIC_PCI_CFG_PPBCN_WBEN 0x0400 + static void topic97_zoom_video(struct pcmcia_socket *sock, int onoff) { struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket); @@ -138,6 +141,7 @@ static int topic97_override(struct yenta_socket *socket) static int topic95_override(struct yenta_socket *socket) { u8 fctrl; + u16 ppbcn; /* enable 3.3V support for 16bit cards */ fctrl = exca_readb(socket, TOPIC_EXCA_IF_CONTROL); @@ -146,6 +150,18 @@ static int topic95_override(struct yenta_socket *socket) /* tell yenta to use exca registers to power 16bit cards */ socket->flags |= YENTA_16BIT_POWER_EXCA | YENTA_16BIT_POWER_DF; + /* Disable write buffers to prevent lockups under load with numerous + Cardbus cards, observed on Tecra 500CDT and reported elsewhere on the + net. This is not a power-on default according to the datasheet + but some BIOSes seem to set it. */ + if (pci_read_config_word(socket->dev, TOPIC_PCI_CFG_PPBCN, &ppbcn) == 0 + && socket->dev->revision <= 7 + && (ppbcn & TOPIC_PCI_CFG_PPBCN_WBEN)) { + ppbcn &= ~TOPIC_PCI_CFG_PPBCN_WBEN; + pci_write_config_word(socket->dev, TOPIC_PCI_CFG_PPBCN, ppbcn); + dev_info(&socket->dev->dev, "Disabled ToPIC95 Cardbus write buffers.\n"); + } + return 0; } diff --git a/drivers/phy/phy-berlin-usb.c b/drivers/phy/phy-berlin-usb.c index c6fc95b530835..ab54f28644510 100644 --- a/drivers/phy/phy-berlin-usb.c +++ b/drivers/phy/phy-berlin-usb.c @@ -106,8 +106,8 @@ static const u32 phy_berlin_pll_dividers[] = { /* Berlin 2 */ CLK_REF_DIV(0xc) | FEEDBACK_CLK_DIV(0x54), - /* Berlin 2CD */ - CLK_REF_DIV(0x6) | FEEDBACK_CLK_DIV(0x55), + /* Berlin 2CD/Q */ + CLK_REF_DIV(0xc) | FEEDBACK_CLK_DIV(0x54), }; struct phy_berlin_usb_priv { diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 63bc12d7a73e5..153e0a27c7ee9 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -275,20 +275,21 @@ EXPORT_SYMBOL_GPL(phy_exit); int phy_power_on(struct phy *phy) { - int ret; + int ret = 0; if (!phy) - return 0; + goto out; if (phy->pwr) { ret = regulator_enable(phy->pwr); if (ret) - return ret; + goto out; } ret = phy_pm_runtime_get_sync(phy); if (ret < 0 && ret != -ENOTSUPP) - return ret; + goto err_pm_sync; + ret = 0; /* Override possible ret == -ENOTSUPP */ mutex_lock(&phy->mutex); @@ -296,19 +297,20 @@ int phy_power_on(struct phy *phy) ret = phy->ops->power_on(phy); if (ret < 0) { dev_err(&phy->dev, "phy poweron failed --> %d\n", ret); - goto out; + goto err_pwr_on; } } ++phy->power_count; mutex_unlock(&phy->mutex); return 0; -out: +err_pwr_on: mutex_unlock(&phy->mutex); phy_pm_runtime_put_sync(phy); +err_pm_sync: if (phy->pwr) regulator_disable(phy->pwr); - +out: return ret; } EXPORT_SYMBOL_GPL(phy_power_on); diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index bc42d6a8939f4..fb9e30ed80184 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -144,6 +144,16 @@ #define PMBR1 0x0D #define GPIO_USB_4PIN_ULPI_2430C (3 << 0) +/* + * If VBUS is valid or ID is ground, then we know a + * cable is present and we need to be runtime-enabled + */ +static inline bool cable_present(enum omap_musb_vbus_id_status stat) +{ + return stat == OMAP_MUSB_VBUS_VALID || + stat == OMAP_MUSB_ID_GROUND; +} + struct twl4030_usb { struct usb_phy phy; struct device *dev; @@ -536,8 +546,10 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) mutex_lock(&twl->lock); if (status >= 0 && status != twl->linkstat) { + status_changed = + cable_present(twl->linkstat) != + cable_present(status); twl->linkstat = status; - status_changed = true; } mutex_unlock(&twl->lock); @@ -553,15 +565,11 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) * USB_LINK_VBUS state. musb_hdrc won't care until it * starts to handle softconnect right. */ - if ((status == OMAP_MUSB_VBUS_VALID) || - (status == OMAP_MUSB_ID_GROUND)) { - if (pm_runtime_suspended(twl->dev)) - pm_runtime_get_sync(twl->dev); + if (cable_present(status)) { + pm_runtime_get_sync(twl->dev); } else { - if (pm_runtime_active(twl->dev)) { - pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put_autosuspend(twl->dev); - } + pm_runtime_mark_last_busy(twl->dev); + pm_runtime_put_autosuspend(twl->dev); } omap_musb_mailbox(status); } @@ -747,6 +755,7 @@ static int twl4030_usb_remove(struct platform_device *pdev) struct twl4030_usb *twl = platform_get_drvdata(pdev); int val; + usb_remove_phy(&twl->phy); pm_runtime_get_sync(twl->dev); cancel_delayed_work(&twl->id_workaround_work); device_remove_file(twl->dev, &dev_attr_vbus); @@ -754,6 +763,13 @@ static int twl4030_usb_remove(struct platform_device *pdev) /* set transceiver mode to power on defaults */ twl4030_usb_set_mode(twl, -1); + /* idle ulpi before powering off */ + if (cable_present(twl->linkstat)) + pm_runtime_put_noidle(twl->dev); + pm_runtime_mark_last_busy(twl->dev); + pm_runtime_put_sync_suspend(twl->dev); + pm_runtime_disable(twl->dev); + /* autogate 60MHz ULPI clock, * clear dpll clock request for i2c access, * disable 32KHz @@ -767,8 +783,6 @@ static int twl4030_usb_remove(struct platform_device *pdev) /* disable complete OTG block */ twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB); - pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put(twl->dev); return 0; } diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c index 5ac59fbb2440f..d3a3be7476e19 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c +++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c @@ -403,14 +403,13 @@ static int imx1_pinconf_set(struct pinctrl_dev *pctldev, unsigned num_configs) { struct imx1_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev); - const struct imx1_pinctrl_soc_info *info = ipctl->info; int i; for (i = 0; i != num_configs; ++i) { imx1_write_bit(ipctl, pin_id, configs[i] & 0x01, MX1_PUEN); dev_dbg(ipctl->dev, "pinconf set pullup pin %s\n", - info->pins[pin_id].name); + pin_desc_get(pctldev, pin_id)->name); } return 0; diff --git a/drivers/pinctrl/freescale/pinctrl-imx25.c b/drivers/pinctrl/freescale/pinctrl-imx25.c index faf635654312a..293ed4381cc0e 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx25.c +++ b/drivers/pinctrl/freescale/pinctrl-imx25.c @@ -26,7 +26,8 @@ #include "pinctrl-imx.h" enum imx25_pads { - MX25_PAD_RESERVE0 = 1, + MX25_PAD_RESERVE0 = 0, + MX25_PAD_RESERVE1 = 1, MX25_PAD_A10 = 2, MX25_PAD_A13 = 3, MX25_PAD_A14 = 4, @@ -169,6 +170,7 @@ enum imx25_pads { /* Pad names for the pinmux subsystem */ static const struct pinctrl_pin_desc imx25_pinctrl_pads[] = { IMX_PINCTRL_PIN(MX25_PAD_RESERVE0), + IMX_PINCTRL_PIN(MX25_PAD_RESERVE1), IMX_PINCTRL_PIN(MX25_PAD_A10), IMX_PINCTRL_PIN(MX25_PAD_A13), IMX_PINCTRL_PIN(MX25_PAD_A14), diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 2062c224e32fb..b2602210784dc 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -146,7 +146,7 @@ struct byt_gpio_pin_context { struct byt_gpio { struct gpio_chip chip; struct platform_device *pdev; - spinlock_t lock; + raw_spinlock_t lock; void __iomem *reg_base; struct pinctrl_gpio_range *range; struct byt_gpio_pin_context *saved_context; @@ -174,11 +174,11 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned offset) unsigned long flags; u32 value; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg); value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); } static u32 byt_get_gpio_mux(struct byt_gpio *vg, unsigned offset) @@ -201,6 +201,9 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) struct byt_gpio *vg = to_byt_gpio(chip); void __iomem *reg = byt_gpio_reg(chip, offset, BYT_CONF0_REG); u32 value, gpio_mux; + unsigned long flags; + + raw_spin_lock_irqsave(&vg->lock, flags); /* * In most cases, func pin mux 000 means GPIO function. @@ -214,18 +217,16 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) value = readl(reg) & BYT_PIN_MUX; gpio_mux = byt_get_gpio_mux(vg, offset); if (WARN_ON(gpio_mux != value)) { - unsigned long flags; - - spin_lock_irqsave(&vg->lock, flags); value = readl(reg) & ~BYT_PIN_MUX; value |= gpio_mux; writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); dev_warn(&vg->pdev->dev, "pin %u forcibly re-configured as GPIO\n", offset); } + raw_spin_unlock_irqrestore(&vg->lock, flags); + pm_runtime_get(&vg->pdev->dev); return 0; @@ -250,7 +251,7 @@ static int byt_irq_type(struct irq_data *d, unsigned type) if (offset >= vg->chip.ngpio) return -EINVAL; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg); WARN(value & BYT_DIRECT_IRQ_EN, @@ -269,7 +270,7 @@ static int byt_irq_type(struct irq_data *d, unsigned type) else if (type & IRQ_TYPE_LEVEL_MASK) __irq_set_handler_locked(d->irq, handle_level_irq); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return 0; } @@ -277,7 +278,15 @@ static int byt_irq_type(struct irq_data *d, unsigned type) static int byt_gpio_get(struct gpio_chip *chip, unsigned offset) { void __iomem *reg = byt_gpio_reg(chip, offset, BYT_VAL_REG); - return readl(reg) & BYT_LEVEL; + struct byt_gpio *vg = to_byt_gpio(chip); + unsigned long flags; + u32 val; + + raw_spin_lock_irqsave(&vg->lock, flags); + val = readl(reg); + raw_spin_unlock_irqrestore(&vg->lock, flags); + + return val & BYT_LEVEL; } static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) @@ -287,7 +296,7 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) unsigned long flags; u32 old_val; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); old_val = readl(reg); @@ -296,7 +305,7 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) else writel(old_val & ~BYT_LEVEL, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); } static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned offset) @@ -306,13 +315,13 @@ static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned offset) unsigned long flags; u32 value; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg) | BYT_DIR_MASK; value &= ~BYT_INPUT_EN; /* active low */ writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return 0; } @@ -326,7 +335,7 @@ static int byt_gpio_direction_output(struct gpio_chip *chip, unsigned long flags; u32 reg_val; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); /* * Before making any direction modifications, do a check if gpio @@ -345,7 +354,7 @@ static int byt_gpio_direction_output(struct gpio_chip *chip, else writel(reg_val & ~BYT_LEVEL, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return 0; } @@ -354,18 +363,19 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) { struct byt_gpio *vg = to_byt_gpio(chip); int i; - unsigned long flags; u32 conf0, val, offs; - spin_lock_irqsave(&vg->lock, flags); - for (i = 0; i < vg->chip.ngpio; i++) { const char *pull_str = NULL; const char *pull = NULL; + unsigned long flags; const char *label; offs = vg->range->pins[i] * 16; + + raw_spin_lock_irqsave(&vg->lock, flags); conf0 = readl(vg->reg_base + offs + BYT_CONF0_REG); val = readl(vg->reg_base + offs + BYT_VAL_REG); + raw_spin_unlock_irqrestore(&vg->lock, flags); label = gpiochip_is_requested(chip, i); if (!label) @@ -418,7 +428,6 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) seq_puts(s, "\n"); } - spin_unlock_irqrestore(&vg->lock, flags); } static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc) @@ -450,8 +459,10 @@ static void byt_irq_ack(struct irq_data *d) unsigned offset = irqd_to_hwirq(d); void __iomem *reg; + raw_spin_lock(&vg->lock); reg = byt_gpio_reg(&vg->chip, offset, BYT_INT_STAT_REG); writel(BIT(offset % 32), reg); + raw_spin_unlock(&vg->lock); } static void byt_irq_unmask(struct irq_data *d) @@ -463,9 +474,9 @@ static void byt_irq_unmask(struct irq_data *d) void __iomem *reg; u32 value; - spin_lock_irqsave(&vg->lock, flags); - reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG); + + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg); switch (irqd_get_trigger_type(d)) { @@ -486,7 +497,7 @@ static void byt_irq_unmask(struct irq_data *d) writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); } static void byt_irq_mask(struct irq_data *d) @@ -578,7 +589,7 @@ static int byt_gpio_probe(struct platform_device *pdev) if (IS_ERR(vg->reg_base)) return PTR_ERR(vg->reg_base); - spin_lock_init(&vg->lock); + raw_spin_lock_init(&vg->lock); gc = &vg->chip; gc->label = dev_name(&pdev->dev); diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-370.c b/drivers/pinctrl/mvebu/pinctrl-armada-370.c index 03aa58c4cb85b..1eb084c3b0c96 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-370.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-370.c @@ -370,11 +370,11 @@ static struct mvebu_mpp_mode mv88f6710_mpp_modes[] = { MPP_MODE(64, MPP_FUNCTION(0x0, "gpio", NULL), MPP_FUNCTION(0x1, "spi0", "miso"), - MPP_FUNCTION(0x2, "spi0-1", "cs1")), + MPP_FUNCTION(0x2, "spi0", "cs1")), MPP_MODE(65, MPP_FUNCTION(0x0, "gpio", NULL), MPP_FUNCTION(0x1, "spi0", "mosi"), - MPP_FUNCTION(0x2, "spi0-1", "cs2")), + MPP_FUNCTION(0x2, "spi0", "cs2")), }; static struct mvebu_pinctrl_soc_info armada_370_pinctrl_info; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-375.c b/drivers/pinctrl/mvebu/pinctrl-armada-375.c index ca1e7571fedb5..203291bde6088 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-375.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-375.c @@ -92,19 +92,17 @@ static struct mvebu_mpp_mode mv88f6720_mpp_modes[] = { MPP_FUNCTION(0x5, "nand", "io1")), MPP_MODE(8, MPP_FUNCTION(0x0, "gpio", NULL), - MPP_FUNCTION(0x1, "dev ", "bootcs"), + MPP_FUNCTION(0x1, "dev", "bootcs"), MPP_FUNCTION(0x2, "spi0", "cs0"), MPP_FUNCTION(0x3, "spi1", "cs0"), MPP_FUNCTION(0x5, "nand", "ce")), MPP_MODE(9, MPP_FUNCTION(0x0, "gpio", NULL), - MPP_FUNCTION(0x1, "nf", "wen"), MPP_FUNCTION(0x2, "spi0", "sck"), MPP_FUNCTION(0x3, "spi1", "sck"), MPP_FUNCTION(0x5, "nand", "we")), MPP_MODE(10, MPP_FUNCTION(0x0, "gpio", NULL), - MPP_FUNCTION(0x1, "nf", "ren"), MPP_FUNCTION(0x2, "dram", "vttctrl"), MPP_FUNCTION(0x3, "led", "c1"), MPP_FUNCTION(0x5, "nand", "re"), diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c index 83bbcc72be1f8..ff411a53b5a45 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c @@ -94,37 +94,39 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd0", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "spi0", "cs1", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "dev", "ad14", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "dev", "ad14", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie3", "clkreq", V_88F6810_PLUS)), MPP_MODE(13, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd1", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "pcie0", "clkreq", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "pcie1", "clkreq", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "spi0", "cs2", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "dev", "ad15", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "dev", "ad15", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie2", "clkreq", V_88F6810_PLUS)), MPP_MODE(14, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd2", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "clk", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "m", "vtt_ctrl", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "spi0", "cs3", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "dev", "wen1", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "dev", "wen1", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie3", "clkreq", V_88F6810_PLUS)), MPP_MODE(15, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd3", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ge", "mdc slave", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "spi0", "mosi", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie1", "rstout", V_88F6820_PLUS)), + MPP_VAR_FUNCTION(4, "spi0", "mosi", V_88F6810_PLUS)), MPP_MODE(16, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxctl", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ge", "mdio slave", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "m", "decc_err", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "spi0", "miso", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie0", "clkreq", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "pcie0", "clkreq", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie1", "clkreq", V_88F6820_PLUS)), MPP_MODE(17, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxclk", V_88F6810_PLUS), @@ -137,13 +139,12 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(1, "ge0", "rxerr", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "trig_gen", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "ua1", "txd", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "spi0", "cs0", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie1", "rstout", V_88F6820_PLUS)), + MPP_VAR_FUNCTION(4, "spi0", "cs0", V_88F6810_PLUS)), MPP_MODE(19, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "col", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "event_req", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie0", "clkreq", V_88F6810_PLUS), + MPP_VAR_FUNCTION(3, "ge0", "txerr", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "ua0", "cts", V_88F6810_PLUS), MPP_VAR_FUNCTION(6, "ua1", "rxd", V_88F6810_PLUS)), @@ -151,7 +152,6 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "txclk", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "clk", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "ua0", "rts", V_88F6810_PLUS), MPP_VAR_FUNCTION(6, "ua1", "txd", V_88F6810_PLUS)), @@ -277,35 +277,27 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(1, "pcie0", "clkreq", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "m", "vtt_ctrl", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "m", "decc_err", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "pcie0", "rstout", V_88F6810_PLUS), + MPP_VAR_FUNCTION(4, "spi1", "cs2", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "dev", "clkout", V_88F6810_PLUS)), MPP_MODE(44, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "sata2", "prsnt", V_88F6828), - MPP_VAR_FUNCTION(4, "sata3", "prsnt", V_88F6828), - MPP_VAR_FUNCTION(5, "pcie0", "rstout", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(4, "sata3", "prsnt", V_88F6828)), MPP_MODE(45, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ref", "clk_out0", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), - MPP_VAR_FUNCTION(4, "pcie2", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie3", "rstout", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS)), MPP_MODE(46, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ref", "clk_out1", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), - MPP_VAR_FUNCTION(4, "pcie2", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie3", "rstout", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS)), MPP_MODE(47, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "sata2", "prsnt", V_88F6828), - MPP_VAR_FUNCTION(4, "spi1", "cs2", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sata3", "prsnt", V_88F6828)), MPP_MODE(48, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), @@ -313,18 +305,19 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(2, "m", "vtt_ctrl", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "tdm2c", "pclk", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "mclk", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "sd0", "d4", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "sd0", "d4", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie0", "clkreq", V_88F6810_PLUS)), MPP_MODE(49, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "sata2", "prsnt", V_88F6828), MPP_VAR_FUNCTION(2, "sata3", "prsnt", V_88F6828), MPP_VAR_FUNCTION(3, "tdm2c", "fsync", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "lrclk", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "sd0", "d5", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "sd0", "d5", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie1", "clkreq", V_88F6820_PLUS)), MPP_MODE(50, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(3, "tdm2c", "drx", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "extclk", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "cmd", V_88F6810_PLUS)), @@ -336,7 +329,6 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_MODE(52, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(3, "tdm2c", "intn", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "sdi", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "d6", V_88F6810_PLUS)), @@ -352,7 +344,7 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(1, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "pcie1", "rstout", V_88F6820_PLUS), + MPP_VAR_FUNCTION(4, "ge0", "txerr", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "d3", V_88F6810_PLUS)), MPP_MODE(55, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), @@ -382,7 +374,6 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "pcie0", "rstout", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "i2c1", "sda", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "spi1", "cs0", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "d2", V_88F6810_PLUS)), }; @@ -411,7 +402,7 @@ static struct mvebu_mpp_ctrl armada_38x_mpp_controls[] = { static struct pinctrl_gpio_range armada_38x_mpp_gpio_ranges[] = { MPP_GPIO_RANGE(0, 0, 0, 32), - MPP_GPIO_RANGE(1, 32, 32, 27), + MPP_GPIO_RANGE(1, 32, 32, 28), }; static int armada_38x_pinctrl_probe(struct platform_device *pdev) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c index 42491624d660d..2dcf9b41e01e8 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c @@ -380,7 +380,7 @@ static struct mvebu_mpp_ctrl armada_39x_mpp_controls[] = { static struct pinctrl_gpio_range armada_39x_mpp_gpio_ranges[] = { MPP_GPIO_RANGE(0, 0, 0, 32), - MPP_GPIO_RANGE(1, 32, 32, 27), + MPP_GPIO_RANGE(1, 32, 32, 28), }; static int armada_39x_pinctrl_probe(struct platform_device *pdev) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index 578db9f033b23..d7cdb146f44d0 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -14,10 +14,7 @@ * available: mv78230, mv78260 and mv78460. From a pin muxing * perspective, the mv78230 has 49 MPP pins. The mv78260 and mv78460 * both have 67 MPP pins (more GPIOs and address lines for the memory - * bus mainly). The only difference between the mv78260 and the - * mv78460 in terms of pin muxing is the addition of two functions on - * pins 43 and 56 to access the VDD of the CPU2 and 3 (mv78260 has two - * cores, mv78460 has four cores). + * bus mainly). */ #include @@ -172,20 +169,17 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_MODE(24, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sata1", "prsnt", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x2, "nf", "bootcs-re", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "rst", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x4, "lcd", "hsync", V_MV78230_PLUS)), MPP_MODE(25, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sata0", "prsnt", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x2, "nf", "bootcs-we", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "pclk", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x4, "lcd", "vsync", V_MV78230_PLUS)), MPP_MODE(26, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "fsync", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "lcd", "clk", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu1-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x4, "lcd", "clk", V_MV78230_PLUS)), MPP_MODE(27, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "ptp", "trig", V_MV78230_PLUS), @@ -200,8 +194,7 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "ptp", "clk", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "int0", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "lcd", "ref-clk", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu0-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x4, "lcd", "ref-clk", V_MV78230_PLUS)), MPP_MODE(30, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "clk", V_MV78230_PLUS), @@ -209,13 +202,11 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_MODE(31, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "cmd", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x3, "tdm", "int2", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu0-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x3, "tdm", "int2", V_MV78230_PLUS)), MPP_MODE(32, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "d0", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x3, "tdm", "int3", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu1-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x3, "tdm", "int3", V_MV78230_PLUS)), MPP_MODE(33, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "d1", V_MV78230_PLUS), @@ -247,7 +238,6 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "spi", "cs1", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "uart2", "cts", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x3, "vdd", "cpu1-pd", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x4, "lcd", "vga-hsync", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x5, "pcie", "clkreq0", V_MV78230_PLUS)), MPP_MODE(41, @@ -262,15 +252,13 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x1, "uart2", "rxd", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "uart0", "cts", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "int7", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "tdm-1", "timer", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu0-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x4, "tdm-1", "timer", V_MV78230_PLUS)), MPP_MODE(43, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "uart2", "txd", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "uart0", "rts", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "spi", "cs3", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "pcie", "rstout", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu2-3-pd", V_MV78460)), + MPP_VAR_FUNCTION(0x4, "pcie", "rstout", V_MV78230_PLUS)), MPP_MODE(44, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "uart2", "cts", V_MV78230_PLUS), @@ -299,7 +287,7 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x5, "pcie", "clkreq3", V_MV78230_PLUS)), MPP_MODE(48, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x1, "tclk", NULL, V_MV78230_PLUS), + MPP_VAR_FUNCTION(0x1, "dev", "clkout", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "dev", "burst/last", V_MV78230_PLUS)), MPP_MODE(49, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), @@ -321,16 +309,13 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x1, "dev", "ad19", V_MV78260_PLUS)), MPP_MODE(55, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x1, "dev", "ad20", V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x2, "vdd", "cpu0-pd", V_MV78260_PLUS)), + MPP_VAR_FUNCTION(0x1, "dev", "ad20", V_MV78260_PLUS)), MPP_MODE(56, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x1, "dev", "ad21", V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x2, "vdd", "cpu1-pd", V_MV78260_PLUS)), + MPP_VAR_FUNCTION(0x1, "dev", "ad21", V_MV78260_PLUS)), MPP_MODE(57, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x1, "dev", "ad22", V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x2, "vdd", "cpu2-3-pd", V_MV78460)), + MPP_VAR_FUNCTION(0x1, "dev", "ad22", V_MV78260_PLUS)), MPP_MODE(58, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), MPP_VAR_FUNCTION(0x1, "dev", "ad23", V_MV78260_PLUS)), diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 2f797cb7e2050..7747814508854 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -320,6 +320,9 @@ static const struct pinctrl_ops at91_pctrl_ops = { static void __iomem *pin_to_controller(struct at91_pinctrl *info, unsigned int bank) { + if (!gpio_chips[bank]) + return NULL; + return gpio_chips[bank]->regbase; } @@ -729,6 +732,10 @@ static int at91_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, pin = &pins_conf[i]; at91_pin_dbg(info->dev, pin); pio = pin_to_controller(info, pin->bank); + + if (!pio) + continue; + mask = pin_to_mask(pin->pin); at91_mux_disable_interrupt(pio, mask); switch (pin->mux) { @@ -848,6 +855,10 @@ static int at91_pinconf_get(struct pinctrl_dev *pctldev, *config = 0; dev_dbg(info->dev, "%s:%d, pin_id=%d", __func__, __LINE__, pin_id); pio = pin_to_controller(info, pin_to_bank(pin_id)); + + if (!pio) + return -EINVAL; + pin = pin_id % MAX_NB_GPIO_PER_BANK; if (at91_mux_get_multidrive(pio, pin)) @@ -889,6 +900,10 @@ static int at91_pinconf_set(struct pinctrl_dev *pctldev, "%s:%d, pin_id=%d, config=0x%lx", __func__, __LINE__, pin_id, config); pio = pin_to_controller(info, pin_to_bank(pin_id)); + + if (!pio) + return -EINVAL; + pin = pin_id % MAX_NB_GPIO_PER_BANK; mask = pin_to_mask(pin); diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c index 22280bddb9e26..8c51a3c65513a 100644 --- a/drivers/pinctrl/pinctrl-zynq.c +++ b/drivers/pinctrl/pinctrl-zynq.c @@ -714,12 +714,13 @@ static const char * const gpio0_groups[] = {"gpio0_0_grp", .mux_val = mval, \ } -#define DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(fname, mval, mux, mask, shift) \ +#define DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(fname, mval, offset, mask, shift)\ [ZYNQ_PMUX_##fname] = { \ .name = #fname, \ .groups = fname##_groups, \ .ngroups = ARRAY_SIZE(fname##_groups), \ .mux_val = mval, \ + .mux = offset, \ .mux_mask = mask, \ .mux_shift = shift, \ } @@ -744,15 +745,15 @@ static const struct zynq_pinmux_function zynq_pmux_functions[] = { DEFINE_ZYNQ_PINMUX_FUNCTION(spi1, 0x50), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio0, 0x40), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio0_pc, 0xc), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_wp, 0, 130, ZYNQ_SDIO_WP_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_wp, 0, 0x130, ZYNQ_SDIO_WP_MASK, ZYNQ_SDIO_WP_SHIFT), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_cd, 0, 130, ZYNQ_SDIO_CD_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_cd, 0, 0x130, ZYNQ_SDIO_CD_MASK, ZYNQ_SDIO_CD_SHIFT), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio1, 0x40), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio1_pc, 0xc), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_wp, 0, 134, ZYNQ_SDIO_WP_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_wp, 0, 0x134, ZYNQ_SDIO_WP_MASK, ZYNQ_SDIO_WP_SHIFT), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_cd, 0, 134, ZYNQ_SDIO_CD_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_cd, 0, 0x134, ZYNQ_SDIO_CD_MASK, ZYNQ_SDIO_CD_SHIFT), DEFINE_ZYNQ_PINMUX_FUNCTION(smc0_nor, 4), DEFINE_ZYNQ_PINMUX_FUNCTION(smc0_nor_cs1, 8), diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index d688d806a8a51..2c1d5f5432a9a 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -305,7 +305,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }; static struct calling_interface_buffer *buffer; -static struct page *bufferpage; static DEFINE_MUTEX(buffer_mutex); static int hwswitch_state; @@ -1896,12 +1895,11 @@ static int __init dell_init(void) * Allocate buffer below 4GB for SMI data--only 32-bit physical addr * is passed to SMI handler. */ - bufferpage = alloc_page(GFP_KERNEL | GFP_DMA32); - if (!bufferpage) { + buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32); + if (!buffer) { ret = -ENOMEM; goto fail_buffer; } - buffer = page_address(bufferpage); ret = dell_setup_rfkill(); @@ -1965,7 +1963,7 @@ static int __init dell_init(void) cancel_delayed_work_sync(&dell_rfkill_work); dell_cleanup_rfkill(); fail_rfkill: - free_page((unsigned long)bufferpage); + free_page((unsigned long)buffer); fail_buffer: platform_device_del(platform_device); fail_platform_device2: diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 06697315a0887..fb4dd7b3ee711 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -54,8 +54,9 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HARDWARE_QUERY 0x4 #define HPWMI_WIRELESS_QUERY 0x5 #define HPWMI_BIOS_QUERY 0x9 +#define HPWMI_FEATURE_QUERY 0xb #define HPWMI_HOTKEY_QUERY 0xc -#define HPWMI_FEATURE_QUERY 0xd +#define HPWMI_FEATURE2_QUERY 0xd #define HPWMI_WIRELESS2_QUERY 0x1b #define HPWMI_POSTCODEERROR_QUERY 0x2a @@ -295,25 +296,33 @@ static int hp_wmi_tablet_state(void) return (state & 0x4) ? 1 : 0; } -static int __init hp_wmi_bios_2009_later(void) +static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, 0, &state, sizeof(state), sizeof(state)); - if (ret) - return ret; + if (!ret) + return 1; - return (state & 0x10) ? 1 : 0; + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; } -static int hp_wmi_enable_hotkeys(void) +static int __init hp_wmi_bios_2009_later(void) { - int ret; - int query = 0x6e; + int state = 0; + int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, 0, &state, + sizeof(state), sizeof(state)); + if (!ret) + return 1; - ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query), - 0); + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; +} +static int __init hp_wmi_enable_hotkeys(void) +{ + int value = 0x6e; + int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &value, + sizeof(value), 0); if (ret) return -EINVAL; return 0; @@ -663,7 +672,7 @@ static int __init hp_wmi_input_setup(void) hp_wmi_tablet_state()); input_sync(hp_wmi_input_dev); - if (hp_wmi_bios_2009_later() == 4) + if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later()) hp_wmi_enable_hotkeys(); status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index b496db87bc050..9a92d13e39178 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -464,8 +464,9 @@ static const struct ideapad_rfk_data ideapad_rfk_data[] = { static int ideapad_rfk_set(void *data, bool blocked) { struct ideapad_rfk_priv *priv = data; + int opcode = ideapad_rfk_data[priv->dev].opcode; - return write_ec_cmd(priv->priv->adev->handle, priv->dev, !blocked); + return write_ec_cmd(priv->priv->adev->handle, opcode, !blocked); } static struct rfkill_ops ideapad_rfk_ops = { @@ -836,6 +837,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G40-30"), }, }, + { + .ident = "Lenovo G50-30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"), + }, + }, + { + .ident = "Lenovo ideapad Y700-17ISK", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-17ISK"), + }, + }, { .ident = "Lenovo Yoga 2 11 / 13 / Pro", .matches = { @@ -843,6 +858,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 2"), }, }, + { + .ident = "Lenovo Yoga 3 14", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 3 14"), + }, + }, { .ident = "Lenovo Yoga 3 Pro 1370", .matches = { @@ -850,6 +872,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3 Pro-1370"), }, }, + { + .ident = "Lenovo Yoga 700", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"), + }, + }, + { + .ident = "Lenovo Yoga 900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 900"), + }, + }, {} }; diff --git a/drivers/platform/x86/intel_scu_ipcutil.c b/drivers/platform/x86/intel_scu_ipcutil.c index 02bc5a6343c3f..aa454241489c9 100644 --- a/drivers/platform/x86/intel_scu_ipcutil.c +++ b/drivers/platform/x86/intel_scu_ipcutil.c @@ -49,7 +49,7 @@ struct scu_ipc_data { static int scu_reg_access(u32 cmd, struct scu_ipc_data *data) { - int count = data->count; + unsigned int count = data->count; if (count == 0 || count == 3 || count > 4) return -EINVAL; diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 9956b9902bb40..35882dd690a64 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2525,11 +2525,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev) if (error) return error; - error = toshiba_hotkey_event_type_get(dev, &events_type); - if (error) { - pr_err("Unable to query Hotkey Event Type\n"); - return error; - } + if (toshiba_hotkey_event_type_get(dev, &events_type)) + pr_notice("Unable to query Hotkey Event Type\n"); + dev->hotkey_event_type = events_type; dev->hotkey_dev = input_allocate_device(); @@ -2766,6 +2764,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev) ret = toshiba_function_keys_get(dev, &special_functions); dev->kbd_function_keys_supported = !ret; + dev->hotkey_event_type = 0; if (toshiba_acpi_setup_keyboard(dev)) pr_info("Unable to activate hotkeys\n"); diff --git a/drivers/power/avs/Kconfig b/drivers/power/avs/Kconfig index 7f3d389bd601e..a67eeace6a89b 100644 --- a/drivers/power/avs/Kconfig +++ b/drivers/power/avs/Kconfig @@ -13,7 +13,7 @@ menuconfig POWER_AVS config ROCKCHIP_IODOMAIN tristate "Rockchip IO domain support" - depends on ARCH_ROCKCHIP && OF + depends on POWER_AVS && ARCH_ROCKCHIP && OF help Say y here to enable support io domains on Rockchip SoCs. It is necessary for the io domain setting of the SoC to match the diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 2ed4a4a6b3c5c..4bc0c7f459a52 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(power_supply_notifier); static struct device_type power_supply_dev_type; +#define POWER_SUPPLY_DEFERRED_REGISTER_TIME msecs_to_jiffies(10) + static bool __power_supply_is_supplied_by(struct power_supply *supplier, struct power_supply *supply) { @@ -121,6 +123,30 @@ void power_supply_changed(struct power_supply *psy) } EXPORT_SYMBOL_GPL(power_supply_changed); +/* + * Notify that power supply was registered after parent finished the probing. + * + * Often power supply is registered from driver's probe function. However + * calling power_supply_changed() directly from power_supply_register() + * would lead to execution of get_property() function provided by the driver + * too early - before the probe ends. + * + * Avoid that by waiting on parent's mutex. + */ +static void power_supply_deferred_register_work(struct work_struct *work) +{ + struct power_supply *psy = container_of(work, struct power_supply, + deferred_register_work.work); + + if (psy->dev.parent) + mutex_lock(&psy->dev.parent->mutex); + + power_supply_changed(psy); + + if (psy->dev.parent) + mutex_unlock(&psy->dev.parent->mutex); +} + #ifdef CONFIG_OF #include @@ -645,6 +671,10 @@ __power_supply_register(struct device *parent, struct power_supply *psy; int rc; + if (!parent) + pr_warn("%s: Expected proper parent device for '%s'\n", + __func__, desc->name); + psy = kzalloc(sizeof(*psy), GFP_KERNEL); if (!psy) return ERR_PTR(-ENOMEM); @@ -659,7 +689,6 @@ __power_supply_register(struct device *parent, dev->release = power_supply_dev_release; dev_set_drvdata(dev, psy); psy->desc = desc; - atomic_inc(&psy->use_cnt); if (cfg) { psy->drv_data = cfg->drv_data; psy->of_node = cfg->of_node; @@ -672,6 +701,8 @@ __power_supply_register(struct device *parent, goto dev_set_name_failed; INIT_WORK(&psy->changed_work, power_supply_changed_work); + INIT_DELAYED_WORK(&psy->deferred_register_work, + power_supply_deferred_register_work); rc = power_supply_check_supplies(psy); if (rc) { @@ -700,7 +731,20 @@ __power_supply_register(struct device *parent, if (rc) goto create_triggers_failed; - power_supply_changed(psy); + /* + * Update use_cnt after any uevents (most notably from device_add()). + * We are here still during driver's probe but + * the power_supply_uevent() calls back driver's get_property + * method so: + * 1. Driver did not assigned the returned struct power_supply, + * 2. Driver could not finish initialization (anything in its probe + * after calling power_supply_register()). + */ + atomic_inc(&psy->use_cnt); + + queue_delayed_work(system_power_efficient_wq, + &psy->deferred_register_work, + POWER_SUPPLY_DEFERRED_REGISTER_TIME); return psy; @@ -720,7 +764,8 @@ __power_supply_register(struct device *parent, /** * power_supply_register() - Register new power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -741,7 +786,8 @@ EXPORT_SYMBOL_GPL(power_supply_register); /** * power_supply_register() - Register new non-waking-source power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -770,7 +816,8 @@ static void devm_power_supply_release(struct device *dev, void *res) /** * power_supply_register() - Register managed power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -805,7 +852,8 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register); /** * power_supply_register() - Register managed non-waking-source power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -849,6 +897,7 @@ void power_supply_unregister(struct power_supply *psy) { WARN_ON(atomic_dec_return(&psy->use_cnt)); cancel_work_sync(&psy->changed_work); + cancel_delayed_work_sync(&psy->deferred_register_work); sysfs_remove_link(&psy->dev.kobj, "powers"); power_supply_remove_triggers(psy); psy_unregister_cooler(psy); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 443eaab933fcf..8a28116b58058 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -779,7 +779,7 @@ static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state) static void print_constraints(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; - char buf[80] = ""; + char buf[160] = ""; int count = 0; int ret; diff --git a/drivers/regulator/max77686.c b/drivers/regulator/max77686.c index 15fb1416bfbde..c064e32fb3b92 100644 --- a/drivers/regulator/max77686.c +++ b/drivers/regulator/max77686.c @@ -88,7 +88,7 @@ enum max77686_ramp_rate { }; struct max77686_data { - u64 gpio_enabled:MAX77686_REGULATORS; + DECLARE_BITMAP(gpio_enabled, MAX77686_REGULATORS); /* Array indexed by regulator id */ unsigned int opmode[MAX77686_REGULATORS]; @@ -121,7 +121,7 @@ static unsigned int max77686_map_normal_mode(struct max77686_data *max77686, case MAX77686_BUCK8: case MAX77686_BUCK9: case MAX77686_LDO20 ... MAX77686_LDO22: - if (max77686->gpio_enabled & (1 << id)) + if (test_bit(id, max77686->gpio_enabled)) return MAX77686_GPIO_CONTROL; } @@ -277,7 +277,7 @@ static int max77686_of_parse_cb(struct device_node *np, } if (gpio_is_valid(config->ena_gpio)) { - max77686->gpio_enabled |= (1 << desc->id); + set_bit(desc->id, max77686->gpio_enabled); return regmap_update_bits(config->regmap, desc->enable_reg, desc->enable_mask, diff --git a/drivers/regulator/pbias-regulator.c b/drivers/regulator/pbias-regulator.c index bd2b75c0d1d12..4fa7bcaf454e8 100644 --- a/drivers/regulator/pbias-regulator.c +++ b/drivers/regulator/pbias-regulator.c @@ -30,6 +30,7 @@ struct pbias_reg_info { u32 enable; u32 enable_mask; + u32 disable_val; u32 vmode; unsigned int enable_time; char *name; @@ -62,6 +63,7 @@ static const struct pbias_reg_info pbias_mmc_omap2430 = { .enable = BIT(1), .enable_mask = BIT(1), .vmode = BIT(0), + .disable_val = 0, .enable_time = 100, .name = "pbias_mmc_omap2430" }; @@ -77,6 +79,7 @@ static const struct pbias_reg_info pbias_sim_omap3 = { static const struct pbias_reg_info pbias_mmc_omap4 = { .enable = BIT(26) | BIT(22), .enable_mask = BIT(26) | BIT(25) | BIT(22), + .disable_val = BIT(25), .vmode = BIT(21), .enable_time = 100, .name = "pbias_mmc_omap4" @@ -85,6 +88,7 @@ static const struct pbias_reg_info pbias_mmc_omap4 = { static const struct pbias_reg_info pbias_mmc_omap5 = { .enable = BIT(27) | BIT(26), .enable_mask = BIT(27) | BIT(25) | BIT(26), + .disable_val = BIT(25), .vmode = BIT(21), .enable_time = 100, .name = "pbias_mmc_omap5" @@ -159,6 +163,7 @@ static int pbias_regulator_probe(struct platform_device *pdev) drvdata[data_idx].desc.enable_reg = res->start; drvdata[data_idx].desc.enable_mask = info->enable_mask; drvdata[data_idx].desc.enable_val = info->enable; + drvdata[data_idx].desc.disable_val = info->disable_val; cfg.init_data = pbias_matches[idx].init_data; cfg.driver_data = &drvdata[data_idx]; diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index ff828117798fd..8de135174e820 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -34,6 +34,8 @@ #include #include +/* The highest number of possible regulators for supported devices. */ +#define S2MPS_REGULATOR_MAX S2MPS13_REGULATOR_MAX struct s2mps11_info { unsigned int rdev_num; int ramp_delay2; @@ -49,7 +51,7 @@ struct s2mps11_info { * One bit for each S2MPS13/S2MPS14/S2MPU02 regulator whether * the suspend mode was enabled. */ - unsigned long long s2mps14_suspend_state:50; + DECLARE_BITMAP(suspend_state, S2MPS_REGULATOR_MAX); /* Array of size rdev_num with GPIO-s for external sleep control */ int *ext_control_gpio; @@ -500,7 +502,7 @@ static int s2mps14_regulator_enable(struct regulator_dev *rdev) switch (s2mps11->dev_type) { case S2MPS13X: case S2MPS14X: - if (s2mps11->s2mps14_suspend_state & (1 << rdev_get_id(rdev))) + if (test_bit(rdev_get_id(rdev), s2mps11->suspend_state)) val = S2MPS14_ENABLE_SUSPEND; else if (gpio_is_valid(s2mps11->ext_control_gpio[rdev_get_id(rdev)])) val = S2MPS14_ENABLE_EXT_CONTROL; @@ -508,7 +510,7 @@ static int s2mps14_regulator_enable(struct regulator_dev *rdev) val = rdev->desc->enable_mask; break; case S2MPU02: - if (s2mps11->s2mps14_suspend_state & (1 << rdev_get_id(rdev))) + if (test_bit(rdev_get_id(rdev), s2mps11->suspend_state)) val = S2MPU02_ENABLE_SUSPEND; else val = rdev->desc->enable_mask; @@ -562,7 +564,7 @@ static int s2mps14_regulator_set_suspend_disable(struct regulator_dev *rdev) if (ret < 0) return ret; - s2mps11->s2mps14_suspend_state |= (1 << rdev_get_id(rdev)); + set_bit(rdev_get_id(rdev), s2mps11->suspend_state); /* * Don't enable suspend mode if regulator is already disabled because * this would effectively for a short time turn on the regulator after @@ -960,18 +962,22 @@ static int s2mps11_pmic_probe(struct platform_device *pdev) case S2MPS11X: s2mps11->rdev_num = ARRAY_SIZE(s2mps11_regulators); regulators = s2mps11_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; case S2MPS13X: s2mps11->rdev_num = ARRAY_SIZE(s2mps13_regulators); regulators = s2mps13_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; case S2MPS14X: s2mps11->rdev_num = ARRAY_SIZE(s2mps14_regulators); regulators = s2mps14_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; case S2MPU02: s2mps11->rdev_num = ARRAY_SIZE(s2mpu02_regulators); regulators = s2mpu02_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; default: dev_err(&pdev->dev, "Invalid device type: %u\n", diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index 4337c3bc6acef..afea84c7a155c 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -28,7 +28,7 @@ #define ABX8XX_REG_WD 0x07 #define ABX8XX_REG_CTRL1 0x10 -#define ABX8XX_CTRL_WRITE BIT(1) +#define ABX8XX_CTRL_WRITE BIT(0) #define ABX8XX_CTRL_12_24 BIT(6) #define ABX8XX_REG_CFG_KEY 0x1f diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 76cbad7a99d34..c5a2523b0185c 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -39,6 +39,7 @@ struct s3c_rtc { void __iomem *base; struct clk *rtc_clk; struct clk *rtc_src_clk; + bool clk_disabled; struct s3c_rtc_data *data; @@ -71,9 +72,12 @@ static void s3c_rtc_enable_clk(struct s3c_rtc *info) unsigned long irq_flags; spin_lock_irqsave(&info->alarm_clk_lock, irq_flags); - clk_enable(info->rtc_clk); - if (info->data->needs_src_clk) - clk_enable(info->rtc_src_clk); + if (info->clk_disabled) { + clk_enable(info->rtc_clk); + if (info->data->needs_src_clk) + clk_enable(info->rtc_src_clk); + info->clk_disabled = false; + } spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags); } @@ -82,9 +86,12 @@ static void s3c_rtc_disable_clk(struct s3c_rtc *info) unsigned long irq_flags; spin_lock_irqsave(&info->alarm_clk_lock, irq_flags); - if (info->data->needs_src_clk) - clk_disable(info->rtc_src_clk); - clk_disable(info->rtc_clk); + if (!info->clk_disabled) { + if (info->data->needs_src_clk) + clk_disable(info->rtc_src_clk); + clk_disable(info->rtc_clk); + info->clk_disabled = true; + } spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags); } @@ -128,6 +135,11 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) s3c_rtc_disable_clk(info); + if (enabled) + s3c_rtc_enable_clk(info); + else + s3c_rtc_disable_clk(info); + return 0; } diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index 8c70d785ba739..ab60287ee72d6 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -635,6 +635,16 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) case S2MPS13X: data[0] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT); ret = regmap_write(info->regmap, info->regs->ctrl, data[0]); + if (ret < 0) + break; + + /* + * Should set WUDR & (RUDR or AUDR) bits to high after writing + * RTC_CTRL register like writing Alarm registers. We can't find + * the description from datasheet but vendor code does that + * really. + */ + ret = s5m8767_rtc_set_alarm_reg(info); break; default: diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 0479e807a776a..d87a85cefb665 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -322,6 +322,13 @@ static int snvs_rtc_suspend(struct device *dev) if (device_may_wakeup(dev)) enable_irq_wake(data->irq); + return 0; +} + +static int snvs_rtc_suspend_noirq(struct device *dev) +{ + struct snvs_rtc_data *data = dev_get_drvdata(dev); + if (data->clk) clk_disable_unprepare(data->clk); @@ -331,23 +338,28 @@ static int snvs_rtc_suspend(struct device *dev) static int snvs_rtc_resume(struct device *dev) { struct snvs_rtc_data *data = dev_get_drvdata(dev); - int ret; if (device_may_wakeup(dev)) - disable_irq_wake(data->irq); + return disable_irq_wake(data->irq); - if (data->clk) { - ret = clk_prepare_enable(data->clk); - if (ret) - return ret; - } + return 0; +} + +static int snvs_rtc_resume_noirq(struct device *dev) +{ + struct snvs_rtc_data *data = dev_get_drvdata(dev); + + if (data->clk) + return clk_prepare_enable(data->clk); return 0; } static const struct dev_pm_ops snvs_rtc_pm_ops = { - .suspend_noirq = snvs_rtc_suspend, - .resume_noirq = snvs_rtc_resume, + .suspend = snvs_rtc_suspend, + .suspend_noirq = snvs_rtc_suspend_noirq, + .resume = snvs_rtc_resume, + .resume_noirq = snvs_rtc_resume_noirq, }; #define SNVS_RTC_PM_OPS (&snvs_rtc_pm_ops) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index a2597e683e790..6a64e86e8ccd1 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -264,8 +264,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) spin_unlock_irqrestore(&lcu->lock, flags); cancel_work_sync(&lcu->suc_data.worker); spin_lock_irqsave(&lcu->lock, flags); - if (device == lcu->suc_data.device) + if (device == lcu->suc_data.device) { + dasd_put_device(device); lcu->suc_data.device = NULL; + } } was_pending = 0; if (device == lcu->ruac_data.device) { @@ -273,8 +275,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) was_pending = 1; cancel_delayed_work_sync(&lcu->ruac_data.dwork); spin_lock_irqsave(&lcu->lock, flags); - if (device == lcu->ruac_data.device) + if (device == lcu->ruac_data.device) { + dasd_put_device(device); lcu->ruac_data.device = NULL; + } } private->lcu = NULL; spin_unlock_irqrestore(&lcu->lock, flags); @@ -549,8 +553,10 @@ static void lcu_update_work(struct work_struct *work) if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) { DBF_DEV_EVENT(DBF_WARNING, device, "could not update" " alias data in lcu (rc = %d), retry later", rc); - schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ); + if (!schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ)) + dasd_put_device(device); } else { + dasd_put_device(device); lcu->ruac_data.device = NULL; lcu->flags &= ~UPDATE_PENDING; } @@ -593,8 +599,10 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, */ if (!usedev) return -EINVAL; + dasd_get_device(usedev); lcu->ruac_data.device = usedev; - schedule_delayed_work(&lcu->ruac_data.dwork, 0); + if (!schedule_delayed_work(&lcu->ruac_data.dwork, 0)) + dasd_put_device(usedev); return 0; } @@ -722,7 +730,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu, ASCEBC((char *) &cqr->magic, 4); ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_RSCK; - ccw->flags = 0 ; + ccw->flags = CCW_FLAG_SLI; ccw->count = 16; ccw->cda = (__u32)(addr_t) cqr->data; ((char *)cqr->data)[0] = reason; @@ -926,6 +934,7 @@ static void summary_unit_check_handling_work(struct work_struct *work) /* 3. read new alias configuration */ _schedule_lcu_update(lcu, device); lcu->suc_data.device = NULL; + dasd_put_device(device); spin_unlock_irqrestore(&lcu->lock, flags); } @@ -985,6 +994,8 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, } lcu->suc_data.reason = reason; lcu->suc_data.device = device; + dasd_get_device(device); spin_unlock(&lcu->lock); - schedule_work(&lcu->suc_data.worker); + if (!schedule_work(&lcu->suc_data.worker)) + dasd_put_device(device); }; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 1efa4fdb7fe21..f45cd0cb1b32f 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "sclp_early" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include #include #include diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 6f1fa1773e764..f8d8fdb26b72a 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -65,6 +65,7 @@ struct virtio_ccw_device { bool is_thinint; bool going_away; bool device_lost; + unsigned int config_ready; void *airq_info; }; @@ -833,8 +834,11 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; - memcpy(vcdev->config, config_area, sizeof(vcdev->config)); - memcpy(buf, &vcdev->config[offset], len); + memcpy(vcdev->config, config_area, offset + len); + if (buf) + memcpy(buf, &vcdev->config[offset], len); + if (vcdev->config_ready < offset + len) + vcdev->config_ready = offset + len; out_free: kfree(config_area); @@ -857,6 +861,9 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, if (!config_area) goto out_free; + /* Make sure we don't overwrite fields. */ + if (vcdev->config_ready < offset) + virtio_ccw_get_config(vdev, 0, NULL, offset); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index add419d6ff349..a56a7b243e91f 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -212,6 +212,17 @@ static const struct file_operations twa_fops = { .llseek = noop_llseek, }; +/* + * The controllers use an inline buffer instead of a mapped SGL for small, + * single entry buffers. Note that we treat a zero-length transfer like + * a mapped SGL. + */ +static bool twa_command_mapped(struct scsi_cmnd *cmd) +{ + return scsi_sg_count(cmd) != 1 || + scsi_bufflen(cmd) >= TW_MIN_SGL_LENGTH; +} + /* This function will complete an aen request from the isr */ static int twa_aen_complete(TW_Device_Extension *tw_dev, int request_id) { @@ -1339,7 +1350,8 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) } /* Now complete the io */ - scsi_dma_unmap(cmd); + if (twa_command_mapped(cmd)) + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); @@ -1582,7 +1594,8 @@ static int twa_reset_device_extension(TW_Device_Extension *tw_dev) struct scsi_cmnd *cmd = tw_dev->srb[i]; cmd->result = (DID_RESET << 16); - scsi_dma_unmap(cmd); + if (twa_command_mapped(cmd)) + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); } } @@ -1765,12 +1778,14 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ retval = twa_scsiop_execute_scsi(tw_dev, request_id, NULL, 0, NULL); switch (retval) { case SCSI_MLQUEUE_HOST_BUSY: - scsi_dma_unmap(SCpnt); + if (twa_command_mapped(SCpnt)) + scsi_dma_unmap(SCpnt); twa_free_request_id(tw_dev, request_id); break; case 1: SCpnt->result = (DID_ERROR << 16); - scsi_dma_unmap(SCpnt); + if (twa_command_mapped(SCpnt)) + scsi_dma_unmap(SCpnt); done(SCpnt); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); @@ -1831,8 +1846,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, /* Map sglist from scsi layer to cmd packet */ if (scsi_sg_count(srb)) { - if ((scsi_sg_count(srb) == 1) && - (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { + if (!twa_command_mapped(srb)) { if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) scsi_sg_copy_to_buffer(srb, @@ -1905,7 +1919,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re { struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH && + if (!twa_command_mapped(cmd) && (cmd->sc_data_direction == DMA_FROM_DEVICE || cmd->sc_data_direction == DMA_BIDIRECTIONAL)) { if (scsi_sg_count(cmd) == 1) { diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index b46ace3d4bf0c..dd0c133aa3127 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -568,7 +568,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev, /* * Command Lock contention */ - err = SCSI_DH_RETRY; + err = SCSI_DH_IMM_RETRY; break; default: break; @@ -618,6 +618,8 @@ static void send_mode_select(struct work_struct *work) err = mode_select_handle_sense(sdev, h->sense); if (err == SCSI_DH_RETRY && retry_cnt--) goto retry; + if (err == SCSI_DH_IMM_RETRY) + goto retry; } if (err == SCSI_DH_OK) { h->state = RDAC_STATE_ACTIVE; diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 26270c351624f..ce129e595b55b 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -39,7 +39,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.6.0.17" +#define DRV_VERSION "1.6.0.17a" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 155b286f1a9d3..25436cd2860cc 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -425,6 +425,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ unsigned long ptr; struct fc_rport_priv *rdata; spinlock_t *io_lock = NULL; + int io_lock_acquired = 0; if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED))) return SCSI_MLQUEUE_HOST_BUSY; @@ -518,6 +519,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ spin_lock_irqsave(io_lock, flags); /* initialize rest of io_req */ + io_lock_acquired = 1; io_req->port_id = rport->port_id; io_req->start_time = jiffies; CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; @@ -571,7 +573,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc))); /* if only we issued IO, will we have the io lock */ - if (CMD_FLAGS(sc) & FNIC_IO_INITIALIZED) + if (io_lock_acquired) spin_unlock_irqrestore(io_lock, flags); atomic_dec(&fnic->in_flight); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 882744852aacb..cccab6188328f 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -599,9 +599,10 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, { struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + unsigned int trace_index; - trace_entry = &ioa_cfg->trace[atomic_add_return - (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; + trace_index = atomic_add_return(1, &ioa_cfg->trace_index) & IPR_TRACE_INDEX_MASK; + trace_entry = &ioa_cfg->trace[trace_index]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -1051,10 +1052,15 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) { + unsigned int hrrq; + if (ioa_cfg->hrrq_num == 1) - return 0; - else - return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; + hrrq = 0; + else { + hrrq = atomic_add_return(1, &ioa_cfg->hrrq_index); + hrrq = (hrrq % (ioa_cfg->hrrq_num - 1)) + 1; + } + return hrrq; } /** @@ -4548,7 +4554,7 @@ static ssize_t ipr_store_raw_mode(struct device *dev, spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); res = (struct ipr_resource_entry *)sdev->hostdata; if (res) { - if (ioa_cfg->sis64 && ipr_is_af_dasd_device(res)) { + if (ipr_is_af_dasd_device(res)) { res->raw_mode = simple_strtoul(buf, NULL, 10); len = strlen(buf); if (res->sdev) @@ -6263,21 +6269,23 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long hrrq_flags; + unsigned long lock_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, lock_flags); list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, lock_flags); } else { - spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock(&ipr_cmd->hrrq->_lock); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); + spin_unlock(&ipr_cmd->hrrq->_lock); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); } } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 47412cf4eaac5..6b97ee45c7b46 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -272,7 +272,7 @@ #define IPR_RUNTIME_RESET 0x40000000 #define IPR_IPL_INIT_MIN_STAGE_TIME 5 -#define IPR_IPL_INIT_DEFAULT_STAGE_TIME 15 +#define IPR_IPL_INIT_DEFAULT_STAGE_TIME 30 #define IPR_IPL_INIT_STAGE_UNKNOWN 0x0 #define IPR_IPL_INIT_STAGE_TRANSOP 0xB0000000 #define IPR_IPL_INIT_STAGE_MASK 0xff000000 @@ -1486,6 +1486,7 @@ struct ipr_ioa_cfg { #define IPR_NUM_TRACE_INDEX_BITS 8 #define IPR_NUM_TRACE_ENTRIES (1 << IPR_NUM_TRACE_INDEX_BITS) +#define IPR_TRACE_INDEX_MASK (IPR_NUM_TRACE_ENTRIES - 1) #define IPR_TRACE_SIZE (sizeof(struct ipr_trace_entry) * IPR_NUM_TRACE_ENTRIES) char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 1b3a094734522..30f9ef0c0d4f8 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -733,8 +733,6 @@ static bool fc_invoke_resp(struct fc_exch *ep, struct fc_seq *sp, if (resp) { resp(sp, fp, arg); res = true; - } else if (!IS_ERR(fp)) { - fc_frame_free(fp); } spin_lock_bh(&ep->ex_lock); @@ -1596,7 +1594,8 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) * If new exch resp handler is valid then call that * first. */ - fc_invoke_resp(ep, sp, fp); + if (!fc_invoke_resp(ep, sp, fp)) + fc_frame_free(fp); fc_exch_release(ep); return; @@ -1695,7 +1694,8 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) fc_exch_hold(ep); if (!rc) fc_exch_delete(ep); - fc_invoke_resp(ep, sp, fp); + if (!fc_invoke_resp(ep, sp, fp)) + fc_frame_free(fp); if (has_rec) fc_exch_timer_set(ep, ep->r_a_tov); fc_exch_release(ep); diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index c6795941b45d9..2d5909c4685ca 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -1039,11 +1039,26 @@ static void fc_fcp_cleanup_each_cmd(struct fc_lport *lport, unsigned int id, fc_fcp_pkt_hold(fsp); spin_unlock_irqrestore(&si->scsi_queue_lock, flags); - if (!fc_fcp_lock_pkt(fsp)) { + spin_lock_bh(&fsp->scsi_pkt_lock); + if (!(fsp->state & FC_SRB_COMPL)) { + fsp->state |= FC_SRB_COMPL; + /* + * TODO: dropping scsi_pkt_lock and then reacquiring + * again around fc_fcp_cleanup_cmd() is required, + * since fc_fcp_cleanup_cmd() calls into + * fc_seq_set_resp() and that func preempts cpu using + * schedule. May be schedule and related code should be + * removed instead of unlocking here to avoid scheduling + * while atomic bug. + */ + spin_unlock_bh(&fsp->scsi_pkt_lock); + fc_fcp_cleanup_cmd(fsp, error); + + spin_lock_bh(&fsp->scsi_pkt_lock); fc_io_compl(fsp); - fc_fcp_unlock_pkt(fsp); } + spin_unlock_bh(&fsp->scsi_pkt_lock); fc_fcp_pkt_release(fsp); spin_lock_irqsave(&si->scsi_queue_lock, flags); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 8053f24f03499..98d9bb6ff725f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2941,10 +2941,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; - unsigned long flags; del_timer_sync(&conn->transport_timer); + mutex_lock(&session->eh_mutex); spin_lock_bh(&session->frwd_lock); conn->c_stage = ISCSI_CONN_CLEANUP_WAIT; if (session->leadconn == conn) { @@ -2956,28 +2956,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) } spin_unlock_bh(&session->frwd_lock); - /* - * Block until all in-progress commands for this connection - * time out or fail. - */ - for (;;) { - spin_lock_irqsave(session->host->host_lock, flags); - if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */ - spin_unlock_irqrestore(session->host->host_lock, flags); - break; - } - spin_unlock_irqrestore(session->host->host_lock, flags); - msleep_interruptible(500); - iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): " - "host_busy %d host_failed %d\n", - atomic_read(&session->host->host_busy), - session->host->host_failed); - /* - * force eh_abort() to unblock - */ - wake_up(&conn->ehwait); - } - /* flush queued up work because we free the connection below */ iscsi_suspend_tx(conn); @@ -2994,6 +2972,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) if (session->leadconn == conn) session->leadconn = NULL; spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); } diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 454536c49315d..9c780740fb829 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -887,6 +887,8 @@ static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc) static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task, struct mvs_slot_info *slot, u32 slot_idx) { + if (!slot) + return; if (!slot->task) return; if (!sas_protocol_ata(task->task_proto)) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 0e6ee3ca30e66..8b011aef12bd5 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -67,10 +67,10 @@ * | | | 0xd031-0xd0ff | * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | - * | Target Mode | 0xe079 | | - * | Target Mode Management | 0xf072 | 0xf002 | + * | Target Mode | 0xe080 | | + * | Target Mode Management | 0xf096 | 0xf002 | * | | | 0xf046-0xf049 | - * | Target Mode Task Management | 0x1000b | | + * | Target Mode Task Management | 0x1000d | | * ---------------------------------------------------------------------- */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index e86201d3b8c6d..90d926ca12000 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -274,6 +274,7 @@ #define RESPONSE_ENTRY_CNT_FX00 256 /* Number of response entries.*/ struct req_que; +struct qla_tgt_sess; /* * (sd.h is not exported, hence local inclusion) @@ -2026,6 +2027,7 @@ typedef struct fc_port { uint16_t port_id; unsigned long retry_delay_timestamp; + struct qla_tgt_sess *tgt_session; } fc_port_t; #include "qla_mr.h" @@ -3579,6 +3581,16 @@ typedef struct scsi_qla_host { uint16_t fcoe_fcf_idx; uint8_t fcoe_vn_port_mac[6]; + /* list of commands waiting on workqueue */ + struct list_head qla_cmd_list; + struct list_head qla_sess_op_cmd_list; + spinlock_t cmd_list_lock; + + /* Counter to detect races between ELS and RSCN events */ + atomic_t generation_tick; + /* Time when global fcport update has been scheduled */ + int total_fcport_update_gen; + uint32_t vp_abort_cnt; struct fc_vport *fc_vport; /* holds fc_vport * for each vport */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 285cb204f3005..60f9651f26432 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -115,6 +115,8 @@ qla2x00_async_iocb_timeout(void *data) QLA_LOGIO_LOGIN_RETRIED : 0; qla2x00_post_async_login_done_work(fcport->vha, fcport, lio->u.logio.data); + } else if (sp->type == SRB_LOGOUT_CMD) { + qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT); } } @@ -497,7 +499,10 @@ void qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport, uint16_t *data) { - qla2x00_mark_device_lost(vha, fcport, 1, 0); + /* Don't re-login in target mode */ + if (!fcport->tgt_session) + qla2x00_mark_device_lost(vha, fcport, 1, 0); + qlt_logo_completion_handler(fcport, data[0]); return; } @@ -2189,7 +2194,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) /* Clear outstanding commands array. */ for (que = 0; que < ha->max_req_queues; que++) { req = ha->req_q_map[que]; - if (!req) + if (!req || !test_bit(que, ha->req_qid_map)) continue; req->out_ptr = (void *)(req->ring + req->length); *req->out_ptr = 0; @@ -2206,7 +2211,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) for (que = 0; que < ha->max_rsp_queues; que++) { rsp = ha->rsp_q_map[que]; - if (!rsp) + if (!rsp || !test_bit(que, ha->rsp_qid_map)) continue; rsp->in_ptr = (void *)(rsp->ring + rsp->length); *rsp->in_ptr = 0; @@ -2922,21 +2927,14 @@ qla2x00_rport_del(void *data) { fc_port_t *fcport = data; struct fc_rport *rport; - scsi_qla_host_t *vha = fcport->vha; unsigned long flags; spin_lock_irqsave(fcport->vha->host->host_lock, flags); rport = fcport->drport ? fcport->drport: fcport->rport; fcport->drport = NULL; spin_unlock_irqrestore(fcport->vha->host->host_lock, flags); - if (rport) { + if (rport) fc_remote_port_delete(rport); - /* - * Release the target mode FC NEXUS in qla_target.c code - * if target mod is enabled. - */ - qlt_fc_port_deleted(vha, fcport); - } } /** @@ -3303,6 +3301,7 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) * Create target mode FC NEXUS in qla_target.c if target mode is * enabled.. */ + qlt_fc_port_added(vha, fcport); spin_lock_irqsave(fcport->vha->host->host_lock, flags); @@ -3375,6 +3374,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) LIST_HEAD(new_fcports); struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + int discovery_gen; /* If FL port exists, then SNS is present */ if (IS_FWI2_CAPABLE(ha)) @@ -3445,6 +3445,14 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) fcport->scan_state = QLA_FCPORT_SCAN; } + /* Mark the time right before querying FW for connected ports. + * This process is long, asynchronous and by the time it's done, + * collected information might not be accurate anymore. E.g. + * disconnected port might have re-connected and a brand new + * session has been created. In this case session's generation + * will be newer than discovery_gen. */ + qlt_do_generation_tick(vha, &discovery_gen); + rval = qla2x00_find_all_fabric_devs(vha, &new_fcports); if (rval != QLA_SUCCESS) break; @@ -3460,20 +3468,44 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; - if (fcport->scan_state == QLA_FCPORT_SCAN && - atomic_read(&fcport->state) == FCS_ONLINE) { - qla2x00_mark_device_lost(vha, fcport, - ql2xplogiabsentdevice, 0); - if (fcport->loop_id != FC_NO_LOOP_ID && - (fcport->flags & FCF_FCP2_DEVICE) == 0 && - fcport->port_type != FCT_INITIATOR && - fcport->port_type != FCT_BROADCAST) { - ha->isp_ops->fabric_logout(vha, - fcport->loop_id, - fcport->d_id.b.domain, - fcport->d_id.b.area, - fcport->d_id.b.al_pa); - qla2x00_clear_loop_id(fcport); + if (fcport->scan_state == QLA_FCPORT_SCAN) { + if (qla_ini_mode_enabled(base_vha) && + atomic_read(&fcport->state) == FCS_ONLINE) { + qla2x00_mark_device_lost(vha, fcport, + ql2xplogiabsentdevice, 0); + if (fcport->loop_id != FC_NO_LOOP_ID && + (fcport->flags & FCF_FCP2_DEVICE) == 0 && + fcport->port_type != FCT_INITIATOR && + fcport->port_type != FCT_BROADCAST) { + ha->isp_ops->fabric_logout(vha, + fcport->loop_id, + fcport->d_id.b.domain, + fcport->d_id.b.area, + fcport->d_id.b.al_pa); + qla2x00_clear_loop_id(fcport); + } + } else if (!qla_ini_mode_enabled(base_vha)) { + /* + * In target mode, explicitly kill + * sessions and log out of devices + * that are gone, so that we don't + * end up with an initiator using the + * wrong ACL (if the fabric recycles + * an FC address and we have a stale + * session around) and so that we don't + * report initiators that are no longer + * on the fabric. + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf077, + "port gone, logging out/killing session: " + "%8phC state 0x%x flags 0x%x fc4_type 0x%x " + "scan_state %d\n", + fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + qlt_fc_port_deleted(vha, fcport, + discovery_gen); } } } @@ -3494,6 +3526,28 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) (fcport->flags & FCF_LOGIN_NEEDED) == 0) continue; + /* + * If we're not an initiator, skip looking for devices + * and logging in. There's no reason for us to do it, + * and it seems to actively cause problems in target + * mode if we race with the initiator logging into us + * (we might get the "port ID used" status back from + * our login command and log out the initiator, which + * seems to cause havoc). + */ + if (!qla_ini_mode_enabled(base_vha)) { + if (fcport->scan_state == QLA_FCPORT_FOUND) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf078, + "port %8phC state 0x%x flags 0x%x fc4_type 0x%x " + "scan_state %d (initiator mode disabled; skipping " + "login)\n", fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + } + continue; + } + if (fcport->loop_id == FC_NO_LOOP_ID) { fcport->loop_id = next_loopid; rval = qla2x00_find_new_loop_id( @@ -3520,16 +3574,38 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; - /* Find a new loop ID to use. */ - fcport->loop_id = next_loopid; - rval = qla2x00_find_new_loop_id(base_vha, fcport); - if (rval != QLA_SUCCESS) { - /* Ran out of IDs to use */ - break; - } + /* + * If we're not an initiator, skip looking for devices + * and logging in. There's no reason for us to do it, + * and it seems to actively cause problems in target + * mode if we race with the initiator logging into us + * (we might get the "port ID used" status back from + * our login command and log out the initiator, which + * seems to cause havoc). + */ + if (qla_ini_mode_enabled(base_vha)) { + /* Find a new loop ID to use. */ + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id(base_vha, + fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; + } - /* Login and update database */ - qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + /* Login and update database */ + qla2x00_fabric_dev_login(vha, fcport, + &next_loopid); + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf079, + "new port %8phC state 0x%x flags 0x%x fc4_type " + "0x%x scan_state %d (initiator mode disabled; " + "skipping login)\n", + fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + } list_move_tail(&fcport->list, &vha->vp_fcports); } @@ -3725,11 +3801,12 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, fcport->fp_speed = new_fcport->fp_speed; /* - * If address the same and state FCS_ONLINE, nothing - * changed. + * If address the same and state FCS_ONLINE + * (or in target mode), nothing changed. */ if (fcport->d_id.b24 == new_fcport->d_id.b24 && - atomic_read(&fcport->state) == FCS_ONLINE) { + (atomic_read(&fcport->state) == FCS_ONLINE || + !qla_ini_mode_enabled(base_vha))) { break; } @@ -3749,6 +3826,22 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, * Log it out if still logged in and mark it for * relogin later. */ + if (!qla_ini_mode_enabled(base_vha)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf080, + "port changed FC ID, %8phC" + " old %x:%x:%x (loop_id 0x%04x)-> new %x:%x:%x\n", + fcport->port_name, + fcport->d_id.b.domain, + fcport->d_id.b.area, + fcport->d_id.b.al_pa, + fcport->loop_id, + new_fcport->d_id.b.domain, + new_fcport->d_id.b.area, + new_fcport->d_id.b.al_pa); + fcport->d_id.b24 = new_fcport->d_id.b24; + break; + } + fcport->d_id.b24 = new_fcport->d_id.b24; fcport->flags |= FCF_LOGIN_NEEDED; if (fcport->loop_id != FC_NO_LOOP_ID && @@ -3768,6 +3861,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, if (found) continue; /* If device was not in our fcports list, then add it. */ + new_fcport->scan_state = QLA_FCPORT_FOUND; list_add_tail(&new_fcport->list, new_fcports); /* Allocate a new replacement fcport. */ @@ -4188,6 +4282,14 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) atomic_read(&fcport->state) != FCS_UNCONFIGURED) { spin_unlock_irqrestore(&ha->vport_slock, flags); qla2x00_rport_del(fcport); + + /* + * Release the target mode FC NEXUS in + * qla_target.c, if target mod is enabled. + */ + qlt_fc_port_deleted(vha, fcport, + base_vha->total_fcport_update_gen); + spin_lock_irqsave(&ha->vport_slock, flags); } } @@ -4855,7 +4957,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) for (i = 1; i < ha->max_rsp_queues; i++) { rsp = ha->rsp_q_map[i]; - if (rsp) { + if (rsp && test_bit(i, ha->rsp_qid_map)) { rsp->options &= ~BIT_0; ret = qla25xx_init_rsp_que(base_vha, rsp); if (ret != QLA_SUCCESS) @@ -4870,8 +4972,8 @@ qla25xx_init_queues(struct qla_hw_data *ha) } for (i = 1; i < ha->max_req_queues; i++) { req = ha->req_q_map[i]; - if (req) { - /* Clear outstanding commands array. */ + if (req && test_bit(i, ha->req_qid_map)) { + /* Clear outstanding commands array. */ req->options &= ~BIT_0; ret = qla25xx_init_req_que(base_vha, req); if (ret != QLA_SUCCESS) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index a1ab25fca8742..dc96f31a88315 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1943,6 +1943,9 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio) logio->entry_type = LOGINOUT_PORT_IOCB_TYPE; logio->control_flags = cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO); + if (!sp->fcport->tgt_session || + !sp->fcport->tgt_session->keep_nport_handle) + logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT); logio->nport_handle = cpu_to_le16(sp->fcport->loop_id); logio->port_id[0] = sp->fcport->d_id.b.al_pa; logio->port_id[1] = sp->fcport->d_id.b.area; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 6dc14cd782b2a..1f3991ba75805 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2992,9 +2992,9 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) "MSI-X: Failed to enable support " "-- %d/%d\n Retry with %d vectors.\n", ha->msix_count, ret, ret); + ha->msix_count = ret; + ha->max_rsp_queues = ha->msix_count - 1; } - ha->msix_count = ret; - ha->max_rsp_queues = ha->msix_count - 1; ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) * ha->msix_count, GFP_KERNEL); if (!ha->msix_entries) { diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index cc94192511cf5..63abed122adf5 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -601,7 +601,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha) /* Delete request queues */ for (cnt = 1; cnt < ha->max_req_queues; cnt++) { req = ha->req_q_map[cnt]; - if (req) { + if (req && test_bit(cnt, ha->req_qid_map)) { ret = qla25xx_delete_req_que(vha, req); if (ret != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x00ea, @@ -615,7 +615,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha) /* Delete response queues */ for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) { rsp = ha->rsp_q_map[cnt]; - if (rsp) { + if (rsp && test_bit(cnt, ha->rsp_qid_map)) { ret = qla25xx_delete_rsp_que(vha, rsp); if (ret != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x00eb, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7462dd70b1506..d007255745770 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -398,6 +398,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha) int cnt; for (cnt = 0; cnt < ha->max_req_queues; cnt++) { + if (!test_bit(cnt, ha->req_qid_map)) + continue; + req = ha->req_q_map[cnt]; qla2x00_free_req_que(ha, req); } @@ -405,6 +408,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha) ha->req_q_map = NULL; for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) { + if (!test_bit(cnt, ha->rsp_qid_map)) + continue; + rsp = ha->rsp_q_map[cnt]; qla2x00_free_rsp_que(ha, rsp); } @@ -3229,11 +3235,14 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport, spin_lock_irqsave(vha->host->host_lock, flags); fcport->drport = rport; spin_unlock_irqrestore(vha->host->host_lock, flags); + qlt_do_generation_tick(vha, &base_vha->total_fcport_update_gen); set_bit(FCPORT_UPDATE_NEEDED, &base_vha->dpc_flags); qla2xxx_wake_dpc(base_vha); } else { + int now; fc_remote_port_delete(rport); - qlt_fc_port_deleted(vha, fcport); + qlt_do_generation_tick(vha, &now); + qlt_fc_port_deleted(vha, fcport, now); } } @@ -3763,8 +3772,11 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, INIT_LIST_HEAD(&vha->vp_fcports); INIT_LIST_HEAD(&vha->work_list); INIT_LIST_HEAD(&vha->list); + INIT_LIST_HEAD(&vha->qla_cmd_list); + INIT_LIST_HEAD(&vha->qla_sess_op_cmd_list); spin_lock_init(&vha->work_lock); + spin_lock_init(&vha->cmd_list_lock); sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no); ql_dbg(ql_dbg_init, vha, 0x0041, diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fe8a8d157e225..df6193b481773 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -113,6 +113,11 @@ static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, struct atio_from_isp *atio, uint16_t status, int qfull); static void qlt_disable_vha(struct scsi_qla_host *vha); +static void qlt_clear_tgt_db(struct qla_tgt *tgt); +static void qlt_send_notify_ack(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *ntfy, + uint32_t add_flags, uint16_t resp_code, int resp_code_valid, + uint16_t srr_flags, uint16_t srr_reject_code, uint8_t srr_explan); /* * Global Variables */ @@ -122,6 +127,16 @@ static struct workqueue_struct *qla_tgt_wq; static DEFINE_MUTEX(qla_tgt_mutex); static LIST_HEAD(qla_tgt_glist); +/* This API intentionally takes dest as a parameter, rather than returning + * int value to avoid caller forgetting to issue wmb() after the store */ +void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest) +{ + scsi_qla_host_t *base_vha = pci_get_drvdata(vha->hw->pdev); + *dest = atomic_inc_return(&base_vha->generation_tick); + /* memory barrier */ + wmb(); +} + /* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) */ static struct qla_tgt_sess *qlt_find_sess_by_port_name( struct qla_tgt *tgt, @@ -381,14 +396,73 @@ static void qlt_free_session_done(struct work_struct *work) struct qla_tgt *tgt = sess->tgt; struct scsi_qla_host *vha = sess->vha; struct qla_hw_data *ha = vha->hw; + unsigned long flags; + bool logout_started = false; + fc_port_t fcport; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084, + "%s: se_sess %p / sess %p from port %8phC loop_id %#04x" + " s_id %02x:%02x:%02x logout %d keep %d plogi %d\n", + __func__, sess->se_sess, sess, sess->port_name, sess->loop_id, + sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa, + sess->logout_on_delete, sess->keep_nport_handle, + sess->plogi_ack_needed); BUG_ON(!tgt); + + if (sess->logout_on_delete) { + int rc; + + memset(&fcport, 0, sizeof(fcport)); + fcport.loop_id = sess->loop_id; + fcport.d_id = sess->s_id; + memcpy(fcport.port_name, sess->port_name, WWN_SIZE); + fcport.vha = vha; + fcport.tgt_session = sess; + + rc = qla2x00_post_async_logout_work(vha, &fcport, NULL); + if (rc != QLA_SUCCESS) + ql_log(ql_log_warn, vha, 0xf085, + "Schedule logo failed sess %p rc %d\n", + sess, rc); + else + logout_started = true; + } + /* * Release the target session for FC Nexus from fabric module code. */ if (sess->se_sess != NULL) ha->tgt.tgt_ops->free_session(sess); + if (logout_started) { + bool traced = false; + + while (!ACCESS_ONCE(sess->logout_completed)) { + if (!traced) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf086, + "%s: waiting for sess %p logout\n", + __func__, sess); + traced = true; + } + msleep(100); + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf087, + "%s: sess %p logout completed\n", + __func__, sess); + } + + spin_lock_irqsave(&ha->hardware_lock, flags); + + if (sess->plogi_ack_needed) + qlt_send_notify_ack(vha, &sess->tm_iocb, + 0, 0, 0, 0, 0, 0); + + list_del(&sess->sess_list_entry); + + spin_unlock_irqrestore(&ha->hardware_lock, flags); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001, "Unregistration of sess %p finished\n", sess); @@ -409,9 +483,9 @@ void qlt_unreg_sess(struct qla_tgt_sess *sess) vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess); - list_del(&sess->sess_list_entry); - if (sess->deleted) - list_del(&sess->del_list_entry); + if (!list_empty(&sess->del_list_entry)) + list_del_init(&sess->del_list_entry); + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; INIT_WORK(&sess->free_work, qlt_free_session_done); schedule_work(&sess->free_work); @@ -431,10 +505,10 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) loop_id = le16_to_cpu(n->u.isp24.nport_handle); if (loop_id == 0xFFFF) { -#if 0 /* FIXME: Re-enable Global event handling.. */ /* Global event */ - atomic_inc(&ha->tgt.qla_tgt->tgt_global_resets_count); - qlt_clear_tgt_db(ha->tgt.qla_tgt); + atomic_inc(&vha->vha_tgt.qla_tgt->tgt_global_resets_count); + qlt_clear_tgt_db(vha->vha_tgt.qla_tgt); +#if 0 /* FIXME: do we need to choose a session here? */ if (!list_empty(&ha->tgt.qla_tgt->sess_list)) { sess = list_entry(ha->tgt.qla_tgt->sess_list.next, typeof(*sess), sess_list_entry); @@ -489,27 +563,38 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess, struct qla_tgt *tgt = sess->tgt; uint32_t dev_loss_tmo = tgt->ha->port_down_retry_count + 5; - if (sess->deleted) - return; + if (sess->deleted) { + /* Upgrade to unconditional deletion in case it was temporary */ + if (immediate && sess->deleted == QLA_SESS_DELETION_PENDING) + list_del(&sess->del_list_entry); + else + return; + } ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, "Scheduling sess %p for deletion\n", sess); - list_add_tail(&sess->del_list_entry, &tgt->del_sess_list); - sess->deleted = 1; - if (immediate) + if (immediate) { dev_loss_tmo = 0; + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; + list_add(&sess->del_list_entry, &tgt->del_sess_list); + } else { + sess->deleted = QLA_SESS_DELETION_PENDING; + list_add_tail(&sess->del_list_entry, &tgt->del_sess_list); + } sess->expires = jiffies + dev_loss_tmo * HZ; ql_dbg(ql_dbg_tgt, sess->vha, 0xe048, - "qla_target(%d): session for port %8phC (loop ID %d) scheduled for " - "deletion in %u secs (expires: %lu) immed: %d\n", - sess->vha->vp_idx, sess->port_name, sess->loop_id, dev_loss_tmo, - sess->expires, immediate); + "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)" + " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n", + sess->vha->vp_idx, sess->port_name, sess->loop_id, + sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa, + dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete, + sess->generation); if (immediate) - schedule_delayed_work(&tgt->sess_del_work, 0); + mod_delayed_work(system_wq, &tgt->sess_del_work, 0); else schedule_delayed_work(&tgt->sess_del_work, sess->expires - jiffies); @@ -578,9 +663,9 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id, /* ha->hardware_lock supposed to be held on entry */ static void qlt_undelete_sess(struct qla_tgt_sess *sess) { - BUG_ON(!sess->deleted); + BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING); - list_del(&sess->del_list_entry); + list_del_init(&sess->del_list_entry); sess->deleted = 0; } @@ -599,7 +684,9 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) del_list_entry); elapsed = jiffies; if (time_after_eq(elapsed, sess->expires)) { - qlt_undelete_sess(sess); + /* No turning back */ + list_del_init(&sess->del_list_entry); + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, "Timeout: sess %p about to be deleted\n", @@ -643,6 +730,13 @@ static struct qla_tgt_sess *qlt_create_sess( fcport->d_id.b.al_pa, fcport->d_id.b.area, fcport->loop_id); + /* Cannot undelete at this point */ + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + spin_unlock_irqrestore(&ha->hardware_lock, + flags); + return NULL; + } + if (sess->deleted) qlt_undelete_sess(sess); @@ -652,6 +746,9 @@ static struct qla_tgt_sess *qlt_create_sess( if (sess->local && !local) sess->local = 0; + + qlt_do_generation_tick(vha, &sess->generation); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return sess; @@ -673,6 +770,14 @@ static struct qla_tgt_sess *qlt_create_sess( sess->s_id = fcport->d_id; sess->loop_id = fcport->loop_id; sess->local = local; + INIT_LIST_HEAD(&sess->del_list_entry); + + /* Under normal circumstances we want to logout from firmware when + * session eventually ends and release corresponding nport handle. + * In the exception cases (e.g. when new PLOGI is waiting) corresponding + * code will adjust these flags as necessary. */ + sess->logout_on_delete = 1; + sess->keep_nport_handle = 0; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006, "Adding sess %p to tgt %p via ->check_initiator_node_acl()\n", @@ -705,6 +810,7 @@ static struct qla_tgt_sess *qlt_create_sess( spin_lock_irqsave(&ha->hardware_lock, flags); list_add_tail(&sess->sess_list_entry, &vha->vha_tgt.qla_tgt->sess_list); vha->vha_tgt.qla_tgt->sess_count++; + qlt_do_generation_tick(vha, &sess->generation); spin_unlock_irqrestore(&ha->hardware_lock, flags); ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04b, @@ -718,7 +824,7 @@ static struct qla_tgt_sess *qlt_create_sess( } /* - * Called from drivers/scsi/qla2xxx/qla_init.c:qla2x00_reg_remote_port() + * Called from qla2x00_reg_remote_port() */ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) { @@ -750,6 +856,10 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) mutex_unlock(&vha->vha_tgt.tgt_mutex); spin_lock_irqsave(&ha->hardware_lock, flags); + } else if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + /* Point of no return */ + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return; } else { kref_get(&sess->se_sess->sess_kref); @@ -780,27 +890,36 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) spin_unlock_irqrestore(&ha->hardware_lock, flags); } -void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) +/* + * max_gen - specifies maximum session generation + * at which this deletion requestion is still valid + */ +void +qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen) { - struct qla_hw_data *ha = vha->hw; struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; struct qla_tgt_sess *sess; - unsigned long flags; if (!vha->hw->tgt.tgt_ops) return; - if (!tgt || (fcport->port_type != FCT_INITIATOR)) + if (!tgt) return; - spin_lock_irqsave(&ha->hardware_lock, flags); if (tgt->tgt_stop) { - spin_unlock_irqrestore(&ha->hardware_lock, flags); return; } sess = qlt_find_sess_by_port_name(tgt, fcport->port_name); if (!sess) { - spin_unlock_irqrestore(&ha->hardware_lock, flags); + return; + } + + if (max_gen - sess->generation < 0) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf092, + "Ignoring stale deletion request for se_sess %p / sess %p" + " for port %8phC, req_gen %d, sess_gen %d\n", + sess->se_sess, sess, sess->port_name, max_gen, + sess->generation); return; } @@ -808,7 +927,6 @@ void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) sess->local = 1; qlt_schedule_sess_for_deletion(sess, false); - spin_unlock_irqrestore(&ha->hardware_lock, flags); } static inline int test_tgt_sess_count(struct qla_tgt *tgt) @@ -1175,6 +1293,70 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha, FCP_TMF_CMPL, true); } +static int abort_cmd_for_tag(struct scsi_qla_host *vha, uint32_t tag) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + + spin_lock(&vha->cmd_list_lock); + + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + if (tag == op->atio.u.isp24.exchange_addr) { + op->aborted = true; + spin_unlock(&vha->cmd_list_lock); + return 1; + } + } + + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + if (tag == cmd->atio.u.isp24.exchange_addr) { + cmd->state = QLA_TGT_STATE_ABORTED; + spin_unlock(&vha->cmd_list_lock); + return 1; + } + } + + spin_unlock(&vha->cmd_list_lock); + return 0; +} + +/* drop cmds for the given lun + * XXX only looks for cmds on the port through which lun reset was recieved + * XXX does not go through the list of other port (which may have cmds + * for the same lun) + */ +static void abort_cmds_for_lun(struct scsi_qla_host *vha, + uint32_t lun, uint8_t *s_id) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + uint32_t key; + + key = sid_to_key(s_id); + spin_lock(&vha->cmd_list_lock); + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + uint32_t op_key; + uint32_t op_lun; + + op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id); + op_lun = scsilun_to_int( + (struct scsi_lun *)&op->atio.u.isp24.fcp_cmnd.lun); + if (op_key == key && op_lun == lun) + op->aborted = true; + } + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + uint32_t cmd_key; + uint32_t cmd_lun; + + cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); + cmd_lun = scsilun_to_int( + (struct scsi_lun *)&cmd->atio.u.isp24.fcp_cmnd.lun); + if (cmd_key == key && cmd_lun == lun) + cmd->state = QLA_TGT_STATE_ABORTED; + } + spin_unlock(&vha->cmd_list_lock); +} + /* ha->hardware_lock supposed to be held on entry */ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, struct abts_recv_from_24xx *abts, struct qla_tgt_sess *sess) @@ -1199,8 +1381,19 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, } spin_unlock(&se_sess->sess_cmd_lock); - if (!found_lun) - return -ENOENT; + /* cmd not in LIO lists, look in qla list */ + if (!found_lun) { + if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) { + /* send TASK_ABORT response immediately */ + qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_CMPL, false); + return 0; + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf081, + "unable to find cmd in driver or LIO for tag 0x%x\n", + abts->exchange_addr_to_abort); + return -ENOENT; + } + } ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00f, "qla_target(%d): task abort (tag=%d)\n", @@ -1284,6 +1477,11 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha, return; } + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_REJECTED, false); + return; + } + rc = __qlt_24xx_handle_abts(vha, abts, sess); if (rc != 0) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf054, @@ -1726,21 +1924,6 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, struct qla_hw_data *ha = vha->hw; struct se_cmd *se_cmd = &cmd->se_cmd; - if (unlikely(cmd->aborted)) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, - "qla_target(%d): terminating exchange " - "for aborted cmd=%p (se_cmd=%p, tag=%d)", vha->vp_idx, cmd, - se_cmd, cmd->tag); - - cmd->state = QLA_TGT_STATE_ABORTED; - cmd->cmd_flags |= BIT_6; - - qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); - - /* !! At this point cmd could be already freed !! */ - return QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED; - } - prm->cmd = cmd; prm->tgt = tgt; prm->rq_result = scsi_status; @@ -2303,6 +2486,19 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, unsigned long flags = 0; int res; + spin_lock_irqsave(&ha->hardware_lock, flags); + if (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + cmd->state = QLA_TGT_STATE_PROCESSED; + if (cmd->sess->logout_completed) + /* no need to terminate. FW already freed exchange. */ + qlt_abort_cmd_on_host_reset(cmd->vha, cmd); + else + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return 0; + } + spin_unlock_irqrestore(&ha->hardware_lock, flags); + memset(&prm, 0, sizeof(prm)); qlt_check_srr_debug(cmd, &xmit_type); @@ -2315,9 +2511,6 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, &full_req_cnt); if (unlikely(res != 0)) { - if (res == QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED) - return 0; - return res; } @@ -2347,9 +2540,10 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_build_ctio_crc2_pkt(&prm, vha); else res = qlt_24xx_build_ctio_pkt(&prm, vha); - if (unlikely(res != 0)) + if (unlikely(res != 0)) { + vha->req->cnt += full_req_cnt; goto out_unmap_unlock; - + } pkt = (struct ctio7_to_24xx *)prm.pkt; @@ -2463,7 +2657,8 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&ha->hardware_lock, flags); - if (qla2x00_reset_active(vha) || cmd->reset_count != ha->chip_reset) { + if (qla2x00_reset_active(vha) || (cmd->reset_count != ha->chip_reset) || + (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)) { /* * Either a chip reset is active or this request was from * previous life, just abort the processing. @@ -2487,8 +2682,11 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) else res = qlt_24xx_build_ctio_pkt(&prm, vha); - if (unlikely(res != 0)) + if (unlikely(res != 0)) { + vha->req->cnt += prm.req_cnt; goto out_unlock_free_unmap; + } + pkt = (struct ctio7_to_24xx *)prm.pkt; pkt->u.status0.flags |= __constant_cpu_to_le16(CTIO7_FLAGS_DATA_OUT | CTIO7_FLAGS_STATUS_MODE_0); @@ -2651,6 +2849,89 @@ qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, } +/* If hardware_lock held on entry, might drop it, then reaquire */ +/* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ +static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *ntfy) +{ + struct nack_to_isp *nack; + struct qla_hw_data *ha = vha->hw; + request_t *pkt; + int ret = 0; + + ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c, + "Sending TERM ELS CTIO (ha=%p)\n", ha); + + pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL); + if (pkt == NULL) { + ql_dbg(ql_dbg_tgt, vha, 0xe080, + "qla_target(%d): %s failed: unable to allocate " + "request packet\n", vha->vp_idx, __func__); + return -ENOMEM; + } + + pkt->entry_type = NOTIFY_ACK_TYPE; + pkt->entry_count = 1; + pkt->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; + + nack = (struct nack_to_isp *)pkt; + nack->ox_id = ntfy->ox_id; + + nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle; + if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) { + nack->u.isp24.flags = ntfy->u.isp24.flags & + __constant_cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB); + } + + /* terminate */ + nack->u.isp24.flags |= + __constant_cpu_to_le16(NOTIFY_ACK_FLAGS_TERMINATE); + + nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; + nack->u.isp24.status = ntfy->u.isp24.status; + nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode; + nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle; + nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address; + nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs; + nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui; + nack->u.isp24.vp_index = ntfy->u.isp24.vp_index; + + qla2x00_start_iocbs(vha, vha->req); + return ret; +} + +static void qlt_send_term_imm_notif(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *imm, int ha_locked) +{ + unsigned long flags = 0; + int rc; + + if (qlt_issue_marker(vha, ha_locked) < 0) + return; + + if (ha_locked) { + rc = __qlt_send_term_imm_notif(vha, imm); + +#if 0 /* Todo */ + if (rc == -ENOMEM) + qlt_alloc_qfull_cmd(vha, imm, 0, 0); +#endif + goto done; + } + + spin_lock_irqsave(&vha->hw->hardware_lock, flags); + rc = __qlt_send_term_imm_notif(vha, imm); + +#if 0 /* Todo */ + if (rc == -ENOMEM) + qlt_alloc_qfull_cmd(vha, imm, 0, 0); +#endif + +done: + if (!ha_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); +} + /* If hardware_lock held on entry, might drop it, then reaquire */ /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, @@ -2717,7 +2998,7 @@ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, static void qlt_send_term_exchange(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked) { - unsigned long flags; + unsigned long flags = 0; int rc; if (qlt_issue_marker(vha, ha_locked) < 0) @@ -2733,17 +3014,18 @@ static void qlt_send_term_exchange(struct scsi_qla_host *vha, rc = __qlt_send_term_exchange(vha, cmd, atio); if (rc == -ENOMEM) qlt_alloc_qfull_cmd(vha, atio, 0, 0); - spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); done: if (cmd && ((cmd->state != QLA_TGT_STATE_ABORTED) || !cmd->cmd_sent_to_fw)) { - if (!ha_locked && !in_interrupt()) - msleep(250); /* just in case */ - - qlt_unmap_sg(vha, cmd); + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); vha->hw->tgt.tgt_ops->free_cmd(cmd); } + + if (!ha_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); + return; } @@ -2794,6 +3076,24 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha) } +void qlt_abort_cmd(struct qla_tgt_cmd *cmd) +{ + struct qla_tgt *tgt = cmd->tgt; + struct scsi_qla_host *vha = tgt->vha; + struct se_cmd *se_cmd = &cmd->se_cmd; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, + "qla_target(%d): terminating exchange for aborted cmd=%p " + "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, + cmd->tag); + + cmd->state = QLA_TGT_STATE_ABORTED; + cmd->cmd_flags |= BIT_6; + + qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); +} +EXPORT_SYMBOL(qlt_abort_cmd); + void qlt_free_cmd(struct qla_tgt_cmd *cmd) { struct qla_tgt_sess *sess = cmd->sess; @@ -3265,6 +3565,13 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) if (tgt->tgt_stop) goto out_term; + if (cmd->state == QLA_TGT_STATE_ABORTED) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf082, + "cmd with tag %u is aborted\n", + cmd->atio.u.isp24.exchange_addr); + goto out_term; + } + cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( @@ -3318,6 +3625,12 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) static void qlt_do_work(struct work_struct *work) { struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); + scsi_qla_host_t *vha = cmd->vha; + unsigned long flags; + + spin_lock_irqsave(&vha->cmd_list_lock, flags); + list_del(&cmd->cmd_list); + spin_unlock_irqrestore(&vha->cmd_list_lock, flags); __qlt_do_work(cmd); } @@ -3347,6 +3660,11 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha, cmd->loop_id = sess->loop_id; cmd->conf_compl_supported = sess->conf_compl_supported; + cmd->cmd_flags = 0; + cmd->jiffies_at_alloc = get_jiffies_64(); + + cmd->reset_count = vha->hw->chip_reset; + return cmd; } @@ -3364,14 +3682,25 @@ static void qlt_create_sess_from_atio(struct work_struct *work) unsigned long flags; uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id; + spin_lock_irqsave(&vha->cmd_list_lock, flags); + list_del(&op->cmd_list); + spin_unlock_irqrestore(&vha->cmd_list_lock, flags); + + if (op->aborted) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf083, + "sess_op with tag %u is aborted\n", + op->atio.u.isp24.exchange_addr); + goto out_term; + } + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022, - "qla_target(%d): Unable to find wwn login" - " (s_id %x:%x:%x), trying to create it manually\n", - vha->vp_idx, s_id[0], s_id[1], s_id[2]); + "qla_target(%d): Unable to find wwn login" + " (s_id %x:%x:%x), trying to create it manually\n", + vha->vp_idx, s_id[0], s_id[1], s_id[2]); if (op->atio.u.raw.entry_count > 1) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023, - "Dropping multy entry atio %p\n", &op->atio); + "Dropping multy entry atio %p\n", &op->atio); goto out_term; } @@ -3436,10 +3765,25 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, memcpy(&op->atio, atio, sizeof(*atio)); op->vha = vha; + + spin_lock(&vha->cmd_list_lock); + list_add_tail(&op->cmd_list, &vha->qla_sess_op_cmd_list); + spin_unlock(&vha->cmd_list_lock); + INIT_WORK(&op->work, qlt_create_sess_from_atio); queue_work(qla_tgt_wq, &op->work); return 0; } + + /* Another WWN used to have our s_id. Our PLOGI scheduled its + * session deletion, but it's still in sess_del_work wq */ + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + ql_dbg(ql_dbg_io, vha, 0x3061, + "New command while old session %p is being deleted\n", + sess); + return -EFAULT; + } + /* * Do kref_get() before returning + dropping qla_hw_data->hardware_lock. */ @@ -3453,13 +3797,13 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, return -ENOMEM; } - cmd->cmd_flags = 0; - cmd->jiffies_at_alloc = get_jiffies_64(); - - cmd->reset_count = vha->hw->chip_reset; - cmd->cmd_in_wq = 1; cmd->cmd_flags |= BIT_0; + + spin_lock(&vha->cmd_list_lock); + list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list); + spin_unlock(&vha->cmd_list_lock); + INIT_WORK(&cmd->work, qlt_do_work); queue_work(qla_tgt_wq, &cmd->work); return 0; @@ -3473,6 +3817,7 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, struct scsi_qla_host *vha = sess->vha; struct qla_hw_data *ha = vha->hw; struct qla_tgt_mgmt_cmd *mcmd; + struct atio_from_isp *a = (struct atio_from_isp *)iocb; int res; uint8_t tmr_func; @@ -3513,6 +3858,7 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, ql_dbg(ql_dbg_tgt_tmr, vha, 0x10002, "qla_target(%d): LUN_RESET received\n", sess->vha->vp_idx); tmr_func = TMR_LUN_RESET; + abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id); break; case QLA_TGT_CLEAR_TS: @@ -3601,6 +3947,9 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb) sizeof(struct atio_from_isp)); } + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) + return -EFAULT; + return qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0); } @@ -3666,22 +4015,280 @@ static int qlt_abort_task(struct scsi_qla_host *vha, return __qlt_abort_task(vha, iocb, sess); } +void qlt_logo_completion_handler(fc_port_t *fcport, int rc) +{ + if (fcport->tgt_session) { + if (rc != MBS_COMMAND_COMPLETE) { + ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093, + "%s: se_sess %p / sess %p from" + " port %8phC loop_id %#04x s_id %02x:%02x:%02x" + " LOGO failed: %#x\n", + __func__, + fcport->tgt_session->se_sess, + fcport->tgt_session, + fcport->port_name, fcport->loop_id, + fcport->d_id.b.domain, fcport->d_id.b.area, + fcport->d_id.b.al_pa, rc); + } + + fcport->tgt_session->logout_completed = 1; + } +} + +static void qlt_swap_imm_ntfy_iocb(struct imm_ntfy_from_isp *a, + struct imm_ntfy_from_isp *b) +{ + struct imm_ntfy_from_isp tmp; + memcpy(&tmp, a, sizeof(struct imm_ntfy_from_isp)); + memcpy(a, b, sizeof(struct imm_ntfy_from_isp)); + memcpy(b, &tmp, sizeof(struct imm_ntfy_from_isp)); +} + +/* +* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) +* +* Schedules sessions with matching port_id/loop_id but different wwn for +* deletion. Returns existing session with matching wwn if present. +* Null otherwise. +*/ +static struct qla_tgt_sess * +qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn, + port_id_t port_id, uint16_t loop_id) +{ + struct qla_tgt_sess *sess = NULL, *other_sess; + uint64_t other_wwn; + + list_for_each_entry(other_sess, &tgt->sess_list, sess_list_entry) { + + other_wwn = wwn_to_u64(other_sess->port_name); + + if (wwn == other_wwn) { + WARN_ON(sess); + sess = other_sess; + continue; + } + + /* find other sess with nport_id collision */ + if (port_id.b24 == other_sess->s_id.b24) { + if (loop_id != other_sess->loop_id) { + ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c, + "Invalidating sess %p loop_id %d wwn %llx.\n", + other_sess, other_sess->loop_id, other_wwn); + + /* + * logout_on_delete is set by default, but another + * session that has the same s_id/loop_id combo + * might have cleared it when requested this session + * deletion, so don't touch it + */ + qlt_schedule_sess_for_deletion(other_sess, true); + } else { + /* + * Another wwn used to have our s_id/loop_id + * combo - kill the session, but don't log out + */ + sess->logout_on_delete = 0; + qlt_schedule_sess_for_deletion(other_sess, + true); + } + continue; + } + + /* find other sess with nport handle collision */ + if (loop_id == other_sess->loop_id) { + ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000d, + "Invalidating sess %p loop_id %d wwn %llx.\n", + other_sess, other_sess->loop_id, other_wwn); + + /* Same loop_id but different s_id + * Ok to kill and logout */ + qlt_schedule_sess_for_deletion(other_sess, true); + } + } + + return sess; +} + +/* Abort any commands for this s_id waiting on qla_tgt_wq workqueue */ +static int abort_cmds_for_s_id(struct scsi_qla_host *vha, port_id_t *s_id) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + uint32_t key; + int count = 0; + + key = (((u32)s_id->b.domain << 16) | + ((u32)s_id->b.area << 8) | + ((u32)s_id->b.al_pa)); + + spin_lock(&vha->cmd_list_lock); + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id); + if (op_key == key) { + op->aborted = true; + count++; + } + } + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + uint32_t cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); + if (cmd_key == key) { + cmd->state = QLA_TGT_STATE_ABORTED; + count++; + } + } + spin_unlock(&vha->cmd_list_lock); + + return count; +} + /* * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire */ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, struct imm_ntfy_from_isp *iocb) { + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_hw_data *ha = vha->hw; + struct qla_tgt_sess *sess = NULL; + uint64_t wwn; + port_id_t port_id; + uint16_t loop_id; + uint16_t wd3_lo; int res = 0; + wwn = wwn_to_u64(iocb->u.isp24.port_name); + + port_id.b.domain = iocb->u.isp24.port_id[2]; + port_id.b.area = iocb->u.isp24.port_id[1]; + port_id.b.al_pa = iocb->u.isp24.port_id[0]; + port_id.b.rsvd_1 = 0; + + loop_id = le16_to_cpu(iocb->u.isp24.nport_handle); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf026, "qla_target(%d): Port ID: 0x%3phC ELS opcode: 0x%02x\n", vha->vp_idx, iocb->u.isp24.port_id, iocb->u.isp24.status_subcode); + /* res = 1 means ack at the end of thread + * res = 0 means ack async/later. + */ switch (iocb->u.isp24.status_subcode) { case ELS_PLOGI: - case ELS_FLOGI: + + /* Mark all stale commands in qla_tgt_wq for deletion */ + abort_cmds_for_s_id(vha, &port_id); + + if (wwn) + sess = qlt_find_sess_invalidate_other(tgt, wwn, + port_id, loop_id); + + if (!sess || IS_SW_RESV_ADDR(sess->s_id)) { + res = 1; + break; + } + + if (sess->plogi_ack_needed) { + /* + * Initiator sent another PLOGI before last PLOGI could + * finish. Swap plogi iocbs and terminate old one + * without acking, new one will get acked when session + * deletion completes. + */ + ql_log(ql_log_warn, sess->vha, 0xf094, + "sess %p received double plogi.\n", sess); + + qlt_swap_imm_ntfy_iocb(iocb, &sess->tm_iocb); + + qlt_send_term_imm_notif(vha, iocb, 1); + + res = 0; + break; + } + + res = 0; + + /* + * Save immediate Notif IOCB for Ack when sess is done + * and being deleted. + */ + memcpy(&sess->tm_iocb, iocb, sizeof(sess->tm_iocb)); + sess->plogi_ack_needed = 1; + + /* + * Under normal circumstances we want to release nport handle + * during LOGO process to avoid nport handle leaks inside FW. + * The exception is when LOGO is done while another PLOGI with + * the same nport handle is waiting as might be the case here. + * Note: there is always a possibily of a race where session + * deletion has already started for other reasons (e.g. ACL + * removal) and now PLOGI arrives: + * 1. if PLOGI arrived in FW after nport handle has been freed, + * FW must have assigned this PLOGI a new/same handle and we + * can proceed ACK'ing it as usual when session deletion + * completes. + * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT + * bit reached it, the handle has now been released. We'll + * get an error when we ACK this PLOGI. Nothing will be sent + * back to initiator. Initiator should eventually retry + * PLOGI and situation will correct itself. + */ + sess->keep_nport_handle = ((sess->loop_id == loop_id) && + (sess->s_id.b24 == port_id.b24)); + qlt_schedule_sess_for_deletion(sess, true); + break; + case ELS_PRLI: + wd3_lo = le16_to_cpu(iocb->u.isp24.u.prli.wd3_lo); + + if (wwn) + sess = qlt_find_sess_invalidate_other(tgt, wwn, port_id, + loop_id); + + if (sess != NULL) { + if (sess->deleted) { + /* + * Impatient initiator sent PRLI before last + * PLOGI could finish. Will force him to re-try, + * while last one finishes. + */ + ql_log(ql_log_warn, sess->vha, 0xf095, + "sess %p PRLI received, before plogi ack.\n", + sess); + qlt_send_term_imm_notif(vha, iocb, 1); + res = 0; + break; + } + + /* + * This shouldn't happen under normal circumstances, + * since we have deleted the old session during PLOGI + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf096, + "PRLI (loop_id %#04x) for existing sess %p (loop_id %#04x)\n", + sess->loop_id, sess, iocb->u.isp24.nport_handle); + + sess->local = 0; + sess->loop_id = loop_id; + sess->s_id = port_id; + + if (wd3_lo & BIT_7) + sess->conf_compl_supported = 1; + + } + res = 1; /* send notify ack */ + + /* Make session global (not used in fabric mode) */ + if (ha->current_topology != ISP_CFG_F) { + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } else { + /* todo: else - create sess here. */ + res = 1; /* send notify ack */ + } + + break; + case ELS_LOGO: case ELS_PRLO: res = qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS_SESS); @@ -3699,6 +4306,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, break; } + case ELS_FLOGI: /* should never happen */ default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf061, "qla_target(%d): Unsupported ELS command %x " @@ -5016,6 +5624,11 @@ static void qlt_abort_work(struct qla_tgt *tgt, if (!sess) goto out_term; } else { + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + sess = NULL; + goto out_term; + } + kref_get(&sess->se_sess->sess_kref); } @@ -5070,6 +5683,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt, if (!sess) goto out_term; } else { + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + sess = NULL; + goto out_term; + } + kref_get(&sess->se_sess->sess_kref); } diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 332086776dfe9..d30c60a1d522a 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -167,7 +167,24 @@ struct imm_ntfy_from_isp { uint32_t srr_rel_offs; uint16_t srr_ui; uint16_t srr_ox_id; - uint8_t reserved_4[19]; + union { + struct { + uint8_t node_name[8]; + } plogi; /* PLOGI/ADISC/PDISC */ + struct { + /* PRLI word 3 bit 0-15 */ + uint16_t wd3_lo; + uint8_t resv0[6]; + } prli; + struct { + uint8_t port_id[3]; + uint8_t resv1; + uint16_t nport_handle; + uint16_t resv2; + } req_els; + } u; + uint8_t port_name[8]; + uint8_t resv3[3]; uint8_t vp_index; uint32_t reserved_5; uint8_t port_id[3]; @@ -234,6 +251,7 @@ struct nack_to_isp { uint8_t reserved[2]; uint16_t ox_id; } __packed; +#define NOTIFY_ACK_FLAGS_TERMINATE BIT_3 #define NOTIFY_ACK_SRR_FLAGS_ACCEPT 0 #define NOTIFY_ACK_SRR_FLAGS_REJECT 1 @@ -790,13 +808,6 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define FC_TM_REJECT 4 #define FC_TM_FAILED 5 -/* - * Error code of qlt_pre_xmit_response() meaning that cmd's exchange was - * terminated, so no more actions is needed and success should be returned - * to target. - */ -#define QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED 0x1717 - #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) #define pci_dma_lo32(a) (a & 0xffffffff) #define pci_dma_hi32(a) ((((a) >> 16)>>16) & 0xffffffff) @@ -874,6 +885,15 @@ struct qla_tgt_sess_op { struct scsi_qla_host *vha; struct atio_from_isp atio; struct work_struct work; + struct list_head cmd_list; + bool aborted; +}; + +enum qla_sess_deletion { + QLA_SESS_DELETION_NONE = 0, + QLA_SESS_DELETION_PENDING = 1, /* hopefully we can get rid of + * this one */ + QLA_SESS_DELETION_IN_PROGRESS = 2, }; /* @@ -884,8 +904,15 @@ struct qla_tgt_sess { port_id_t s_id; unsigned int conf_compl_supported:1; - unsigned int deleted:1; + unsigned int deleted:2; unsigned int local:1; + unsigned int logout_on_delete:1; + unsigned int plogi_ack_needed:1; + unsigned int keep_nport_handle:1; + + unsigned char logout_completed; + + int generation; struct se_session *se_sess; struct scsi_qla_host *vha; @@ -897,6 +924,10 @@ struct qla_tgt_sess { uint8_t port_name[WWN_SIZE]; struct work_struct free_work; + + union { + struct imm_ntfy_from_isp tm_iocb; + }; }; struct qla_tgt_cmd { @@ -912,7 +943,6 @@ struct qla_tgt_cmd { unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; unsigned int free_sg:1; - unsigned int aborted:1; /* Needed in case of SRR */ unsigned int write_data_transferred:1; unsigned int ctx_dsd_alloced:1; unsigned int q_full:1; @@ -1027,6 +1057,10 @@ struct qla_tgt_srr_ctio { struct qla_tgt_cmd *cmd; }; +/* Check for Switch reserved address */ +#define IS_SW_RESV_ADDR(_s_id) \ + ((_s_id.b.domain == 0xff) && (_s_id.b.area == 0xfc)) + #define QLA_TGT_XMIT_DATA 1 #define QLA_TGT_XMIT_STATUS 2 #define QLA_TGT_XMIT_ALL (QLA_TGT_XMIT_STATUS|QLA_TGT_XMIT_DATA) @@ -1044,7 +1078,7 @@ extern int qlt_lport_register(void *, u64, u64, u64, extern void qlt_lport_deregister(struct scsi_qla_host *); extern void qlt_unreg_sess(struct qla_tgt_sess *); extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *); -extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *); +extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *, int); extern int __init qlt_init(void); extern void qlt_exit(void); extern void qlt_update_vp_map(struct scsi_qla_host *, int); @@ -1074,12 +1108,23 @@ static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha) ha->host->active_mode |= MODE_INITIATOR; } +static inline uint32_t sid_to_key(const uint8_t *s_id) +{ + uint32_t key; + + key = (((unsigned long)s_id[0] << 16) | + ((unsigned long)s_id[1] << 8) | + (unsigned long)s_id[2]); + return key; +} + /* * Exported symbols from qla_target.c LLD logic used by qla2xxx code.. */ extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *); extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); +extern void qlt_abort_cmd(struct qla_tgt_cmd *); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); @@ -1110,5 +1155,7 @@ extern void qlt_stop_phase2(struct qla_tgt *); extern irqreturn_t qla83xx_msix_atio_q(int, void *); extern void qlt_83xx_iospace_config(struct qla_hw_data *); extern int qlt_free_qfull_cmds(struct scsi_qla_host *); +extern void qlt_logo_completion_handler(fc_port_t *, int); +extern void qlt_do_generation_tick(struct scsi_qla_host *, int *); #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 962cb89fe0ae9..af806fdb0dbcd 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -395,6 +395,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, if (ent->t263.queue_type == T263_QUEUE_TYPE_REQ) { for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; + + if (!test_bit(i, vha->hw->req_qid_map)) + continue; + if (req || !buf) { length = req ? req->length : REQUEST_ENTRY_CNT_24XX; @@ -408,6 +412,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, } else if (ent->t263.queue_type == T263_QUEUE_TYPE_RSP) { for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; + + if (!test_bit(i, vha->hw->rsp_qid_map)) + continue; + if (rsp || !buf) { length = rsp ? rsp->length : RESPONSE_ENTRY_CNT_MQ; @@ -634,6 +642,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, if (ent->t274.queue_type == T274_QUEUE_TYPE_REQ_SHAD) { for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; + + if (!test_bit(i, vha->hw->req_qid_map)) + continue; + if (req || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); @@ -645,6 +657,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, } else if (ent->t274.queue_type == T274_QUEUE_TYPE_RSP_SHAD) { for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; + + if (!test_bit(i, vha->hw->rsp_qid_map)) + continue; + if (rsp || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 5c9e680aa375a..fdad875ca7778 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -429,7 +429,7 @@ static int tcm_qla2xxx_check_stop_free(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_14; } - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } /* tcm_qla2xxx_release_cmd - Callback from TCM Core to release underlying @@ -669,7 +669,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_4; cmd->bufflen = se_cmd->data_length; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); cmd->sg_cnt = se_cmd->t_data_nents; cmd->sg = se_cmd->t_data_sg; @@ -699,7 +698,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg_cnt = 0; cmd->offset = 0; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); if (cmd->cmd_flags & BIT_5) { pr_crit("Bit_5 already set for cmd = %p.\n", cmd); dump_stack(); @@ -764,14 +762,7 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; - struct qla_hw_data *ha = vha->hw; - - if (!cmd->sg_mapped) - return; - - pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction); - cmd->sg_mapped = 0; + qlt_abort_cmd(cmd); } static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, @@ -1323,9 +1314,7 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id( return NULL; } - key = (((unsigned long)s_id[0] << 16) | - ((unsigned long)s_id[1] << 8) | - (unsigned long)s_id[2]); + key = sid_to_key(s_id); pr_debug("find_sess_by_s_id: 0x%06x\n", key); se_nacl = btree_lookup32(&lport->lport_fcport_map, key); @@ -1360,9 +1349,7 @@ static void tcm_qla2xxx_set_sess_by_s_id( void *slot; int rc; - key = (((unsigned long)s_id[0] << 16) | - ((unsigned long)s_id[1] << 8) | - (unsigned long)s_id[2]); + key = sid_to_key(s_id); pr_debug("set_sess_by_s_id: %06x\n", key); slot = btree_lookup32(&lport->lport_fcport_map, key); @@ -1718,6 +1705,10 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id, } sess->conf_compl_supported = conf_compl_supported; + + /* Reset logout parameters to default */ + sess->logout_on_delete = 1; + sess->keep_nport_handle = 0; } /* diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 9f77d23239a26..ac418e73536d7 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -205,6 +205,7 @@ static struct { {"Intel", "Multi-Flex", NULL, BLIST_NO_RSOC}, {"iRiver", "iFP Mass Driver", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, {"LASOUND", "CDX7405", "3.10", BLIST_MAX5LUN | BLIST_SINGLELUN}, + {"Marvell", "Console", NULL, BLIST_SKIP_VPD_PAGES}, {"MATSHITA", "PD-1", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"MATSHITA", "DMC-LC5", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, {"MATSHITA", "DMC-LC40", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, @@ -227,6 +228,7 @@ static struct { {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "", NULL, BLIST_SPARSELUN}, {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024}, + {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024}, {"QUANTUM", "XP34301", "1071", BLIST_NOTQ}, {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN}, {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN}, diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index c95a4e943fc68..c6b93d273799c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -944,7 +943,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, scmd->sdb.length); scmd->sdb.table.sgl = &ses->sense_sgl; scmd->sc_data_direction = DMA_FROM_DEVICE; - scmd->sdb.table.nents = 1; + scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1; scmd->cmnd[0] = REQUEST_SENSE; scmd->cmnd[4] = scmd->sdb.length; scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); @@ -2170,8 +2169,17 @@ int scsi_error_handler(void *data) * We never actually get interrupted because kthread_run * disables signal delivery for the created thread. */ - while (!kthread_should_stop()) { + while (true) { + /* + * The sequence in kthread_stop() sets the stop flag first + * then wakes the process. To avoid missed wakeups, the task + * should always be in a non running state before the stop + * flag is checked + */ set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || shost->host_failed != atomic_read(&shost->host_busy)) { SCSI_LOG_ERROR_RECOVERY(1, @@ -2587,33 +2595,3 @@ void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq) } } EXPORT_SYMBOL(scsi_build_sense_buffer); - -/** - * scsi_set_sense_information - set the information field in a - * formatted sense data buffer - * @buf: Where to build sense data - * @info: 64-bit information value to be set - * - **/ -void scsi_set_sense_information(u8 *buf, u64 info) -{ - if ((buf[0] & 0x7f) == 0x72) { - u8 *ucp, len; - - len = buf[7]; - ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0); - if (!ucp) { - buf[7] = len + 0xa; - ucp = buf + 8 + len; - } - ucp[0] = 0; - ucp[1] = 0xa; - ucp[2] = 0x80; /* Valid bit */ - ucp[3] = 0; - put_unaligned_be64(info, &ucp[4]); - } else if ((buf[0] & 0x7f) == 0x70) { - buf[0] |= 0x80; - put_unaligned_be64(info, &buf[3]); - } -} -EXPORT_SYMBOL(scsi_set_sense_information); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b1a263137a233..448ebdaa3d694 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -583,7 +583,7 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask) static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq) { - if (mq && sdb->table.nents <= SCSI_MAX_SG_SEGMENTS) + if (mq && sdb->table.orig_nents <= SCSI_MAX_SG_SEGMENTS) return; __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free); } @@ -597,8 +597,8 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents, bool mq) if (mq) { if (nents <= SCSI_MAX_SG_SEGMENTS) { - sdb->table.nents = nents; - sg_init_table(sdb->table.sgl, sdb->table.nents); + sdb->table.nents = sdb->table.orig_nents = nents; + sg_init_table(sdb->table.sgl, nents); return 0; } first_chunk = sdb->table.sgl; diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index 9e43ae1d2163d..e4b7998379485 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -217,15 +217,15 @@ static int sdev_runtime_suspend(struct device *dev) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; struct scsi_device *sdev = to_scsi_device(dev); - int err; + int err = 0; - err = blk_pre_runtime_suspend(sdev->request_queue); - if (err) - return err; - if (pm && pm->runtime_suspend) + if (pm && pm->runtime_suspend) { + err = blk_pre_runtime_suspend(sdev->request_queue); + if (err) + return err; err = pm->runtime_suspend(dev); - blk_post_runtime_suspend(sdev->request_queue, err); - + blk_post_runtime_suspend(sdev->request_queue, err); + } return err; } @@ -248,11 +248,11 @@ static int sdev_runtime_resume(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int err = 0; - blk_pre_runtime_resume(sdev->request_queue); - if (pm && pm->runtime_resume) + if (pm && pm->runtime_resume) { + blk_pre_runtime_resume(sdev->request_queue); err = pm->runtime_resume(dev); - blk_post_runtime_resume(sdev->request_queue, err); - + blk_post_runtime_resume(sdev->request_queue, err); + } return err; } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 1ac38e73df7ee..9ad41168d26df 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -859,7 +859,7 @@ sdev_store_queue_depth(struct device *dev, struct device_attribute *attr, depth = simple_strtoul(buf, NULL, 0); - if (depth < 1 || depth > sht->can_queue) + if (depth < 1 || depth > sdev->host->can_queue) return -EINVAL; retval = sht->change_queue_depth(sdev, depth); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index ae45bd99baed7..f115f67a6ba58 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -396,6 +396,36 @@ static void srp_reconnect_work(struct work_struct *work) } } +/** + * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. + * + * To do: add support for scsi-mq in this function. + */ +static int scsi_request_fn_active(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + struct request_queue *q; + int request_fn_active = 0; + + shost_for_each_device(sdev, shost) { + q = sdev->request_queue; + + spin_lock_irq(q->queue_lock); + request_fn_active += q->request_fn_active; + spin_unlock_irq(q->queue_lock); + } + + return request_fn_active; +} + +/* Wait until ongoing shost->hostt->queuecommand() calls have finished. */ +static void srp_wait_for_queuecommand(struct Scsi_Host *shost) +{ + while (scsi_request_fn_active(shost)) + msleep(20); +} + static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); @@ -409,8 +439,10 @@ static void __rport_fail_io_fast(struct srp_rport *rport) /* Involve the LLD if possible to terminate all I/O on the rport. */ i = to_srp_internal(shost->transportt); - if (i->f->terminate_rport_io) + if (i->f->terminate_rport_io) { + srp_wait_for_queuecommand(shost); i->f->terminate_rport_io(rport); + } } /** @@ -503,27 +535,6 @@ void srp_start_tl_fail_timers(struct srp_rport *rport) } EXPORT_SYMBOL(srp_start_tl_fail_timers); -/** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @shost: SCSI host for which to count the number of scsi_request_fn() callers. - */ -static int scsi_request_fn_active(struct Scsi_Host *shost) -{ - struct scsi_device *sdev; - struct request_queue *q; - int request_fn_active = 0; - - shost_for_each_device(sdev, shost) { - q = sdev->request_queue; - - spin_lock_irq(q->queue_lock); - request_fn_active += q->request_fn_active; - spin_unlock_irq(q->queue_lock); - } - - return request_fn_active; -} - /** * srp_reconnect_rport() - reconnect to an SRP target port * @rport: SRP target port. @@ -559,8 +570,7 @@ int srp_reconnect_rport(struct srp_rport *rport) if (res) goto out; scsi_target_block(&shost->shost_gendev); - while (scsi_request_fn_active(shost)) - msleep(20); + srp_wait_for_queuecommand(shost); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 7f9d65fe4fd9a..c66fd23b3c13d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2770,9 +2770,9 @@ static int sd_revalidate_disk(struct gendisk *disk) max_xfer = sdkp->max_xfer_blocks; max_xfer <<= ilog2(sdp->sector_size) - 9; - max_xfer = min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), - max_xfer); - blk_queue_max_hw_sectors(sdkp->disk->queue, max_xfer); + sdkp->disk->queue->limits.max_sectors = + min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer); + set_capacity(disk, sdkp->capacity); sd_config_write_same(sdkp); kfree(buffer); @@ -3141,8 +3141,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; - if (!sdkp) - return 0; /* this can happen */ + if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ + return 0; if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); @@ -3181,6 +3181,9 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + if (!sdkp->device->manage_start_stop) return 0; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9d7b7db75e4b9..3bbf4853733cd 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1255,7 +1255,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) } sfp->mmap_called = 1; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; return 0; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 8bd54a64efd6a..64c867405ad4f 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev) { struct scsi_cd *cd = dev_get_drvdata(dev); + if (!cd) /* E.g.: runtime suspend following sr_remove() */ + return 0; + if (cd->media_present) return -EBUSY; else @@ -985,6 +988,7 @@ static int sr_remove(struct device *dev) scsi_autopm_get_device(cd->device); del_gendisk(cd->disk); + dev_set_drvdata(dev, NULL); mutex_lock(&sr_ref_mutex); kref_put(&cd->kref, sr_kref_release); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 9a1c34205254f..525ab4c1f306c 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -1274,9 +1274,9 @@ static int st_open(struct inode *inode, struct file *filp) spin_lock(&st_use_lock); STp->in_use = 0; spin_unlock(&st_use_lock); - scsi_tape_put(STp); if (resumed) scsi_autopm_put_device(STp->device); + scsi_tape_put(STp); return retval; } diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index c956395cf46f9..c89bada875f85 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -732,12 +732,12 @@ void tegra_pmc_init_tsense_reset(struct tegra_pmc *pmc) u32 value, checksum; if (!pmc->soc->has_tsense_reset) - goto out; + return; np = of_find_node_by_name(pmc->dev->of_node, "i2c-thermtrip"); if (!np) { dev_warn(dev, "i2c-thermtrip node not found, %s.\n", disabled); - goto out; + return; } if (of_property_read_u32(np, "nvidia,i2c-controller-id", &ctrl_id)) { diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 37875cf942f7b..a5067739ee938 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -257,13 +257,11 @@ static int bcm2835_spi_transfer_one(struct spi_master *master, spi_used_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536); bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv); - /* handle all the modes */ + /* handle all the 3-wire mode */ if ((spi->mode & SPI_3WIRE) && (tfr->rx_buf)) cs |= BCM2835_SPI_CS_REN; - if (spi->mode & SPI_CPOL) - cs |= BCM2835_SPI_CS_CPOL; - if (spi->mode & SPI_CPHA) - cs |= BCM2835_SPI_CS_CPHA; + else + cs &= ~BCM2835_SPI_CS_REN; /* for gpio_cs set dummy CS so that no HW-CS get changed * we can not run this in bcm2835_spi_set_cs, as it does @@ -291,6 +289,25 @@ static int bcm2835_spi_transfer_one(struct spi_master *master, return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs); } +static int bcm2835_spi_prepare_message(struct spi_master *master, + struct spi_message *msg) +{ + struct spi_device *spi = msg->spi; + struct bcm2835_spi *bs = spi_master_get_devdata(master); + u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS); + + cs &= ~(BCM2835_SPI_CS_CPOL | BCM2835_SPI_CS_CPHA); + + if (spi->mode & SPI_CPOL) + cs |= BCM2835_SPI_CS_CPOL; + if (spi->mode & SPI_CPHA) + cs |= BCM2835_SPI_CS_CPHA; + + bcm2835_wr(bs, BCM2835_SPI_CS, cs); + + return 0; +} + static void bcm2835_spi_handle_err(struct spi_master *master, struct spi_message *msg) { @@ -429,6 +446,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev) master->set_cs = bcm2835_spi_set_cs; master->transfer_one = bcm2835_spi_transfer_one; master->handle_err = bcm2835_spi_handle_err; + master->prepare_message = bcm2835_spi_prepare_message; master->dev.of_node = pdev->dev.of_node; bs = spi_master_get_devdata(master); diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h index 06b34e5bcfa37..47bb9b898dfdc 100644 --- a/drivers/spi/spi-bitbang-txrx.h +++ b/drivers/spi/spi-bitbang-txrx.h @@ -49,7 +49,7 @@ bitbang_txrx_be_cpha0(struct spi_device *spi, { /* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */ - bool oldbit = !(word & 1); + u32 oldbit = (!(word & (1<<(bits-1)))) << 31; /* clock starts at inactive polarity */ for (word <<= (32 - bits); likely(bits); bits--) { @@ -81,7 +81,7 @@ bitbang_txrx_be_cpha1(struct spi_device *spi, { /* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */ - bool oldbit = !(word & (1 << 31)); + u32 oldbit = (!(word & (1<<(bits-1)))) << 31; /* clock starts at inactive polarity */ for (word <<= (32 - bits); likely(bits); bits--) { diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index eb03e1215195b..7edede6e024ba 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -74,6 +74,9 @@ static int dw_spi_mmio_probe(struct platform_device *pdev) dws->max_freq = clk_get_rate(dwsmmio->clk); + of_property_read_u32(pdev->dev.of_node, "reg-io-width", + &dws->reg_io_width); + num_cs = 4; if (pdev->dev.of_node) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 8d67d03c71ebc..4fbfcdc5cb244 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -194,7 +194,7 @@ static void dw_writer(struct dw_spi *dws) else txw = *(u16 *)(dws->tx); } - dw_writel(dws, DW_SPI_DR, txw); + dw_write_io_reg(dws, DW_SPI_DR, txw); dws->tx += dws->n_bytes; } } @@ -205,7 +205,7 @@ static void dw_reader(struct dw_spi *dws) u16 rxw; while (max--) { - rxw = dw_readl(dws, DW_SPI_DR); + rxw = dw_read_io_reg(dws, DW_SPI_DR); /* Care rx only if the transfer's original "rx" is not null */ if (dws->rx_end - dws->len) { if (dws->n_bytes == 1) diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h index 6c91391c1a4f8..b75ed327d5a29 100644 --- a/drivers/spi/spi-dw.h +++ b/drivers/spi/spi-dw.h @@ -109,6 +109,7 @@ struct dw_spi { u32 fifo_len; /* depth of the FIFO buffer */ u32 max_freq; /* max bus freq supported */ + u32 reg_io_width; /* DR I/O width in bytes */ u16 bus_num; u16 num_cs; /* supported slave numbers */ @@ -145,11 +146,45 @@ static inline u32 dw_readl(struct dw_spi *dws, u32 offset) return __raw_readl(dws->regs + offset); } +static inline u16 dw_readw(struct dw_spi *dws, u32 offset) +{ + return __raw_readw(dws->regs + offset); +} + static inline void dw_writel(struct dw_spi *dws, u32 offset, u32 val) { __raw_writel(val, dws->regs + offset); } +static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val) +{ + __raw_writew(val, dws->regs + offset); +} + +static inline u32 dw_read_io_reg(struct dw_spi *dws, u32 offset) +{ + switch (dws->reg_io_width) { + case 2: + return dw_readw(dws, offset); + case 4: + default: + return dw_readl(dws, offset); + } +} + +static inline void dw_write_io_reg(struct dw_spi *dws, u32 offset, u32 val) +{ + switch (dws->reg_io_width) { + case 2: + dw_writew(dws, offset, val); + break; + case 4: + default: + dw_writel(dws, offset, val); + break; + } +} + static inline void spi_enable_chip(struct dw_spi *dws, int enable) { dw_writel(dws, DW_SPI_SSIENR, (enable ? 1 : 0)); diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 788e2b176a4f7..bb916c8d40db8 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -40,6 +40,7 @@ #define SPFI_CONTROL_SOFT_RESET BIT(11) #define SPFI_CONTROL_SEND_DMA BIT(10) #define SPFI_CONTROL_GET_DMA BIT(9) +#define SPFI_CONTROL_SE BIT(8) #define SPFI_CONTROL_TMODE_SHIFT 5 #define SPFI_CONTROL_TMODE_MASK 0x7 #define SPFI_CONTROL_TMODE_SINGLE 0 @@ -104,6 +105,10 @@ struct img_spfi { bool rx_dma_busy; }; +struct img_spfi_device_data { + bool gpio_requested; +}; + static inline u32 spfi_readl(struct img_spfi *spfi, u32 reg) { return readl(spfi->regs + reg); @@ -266,15 +271,15 @@ static int img_spfi_start_pio(struct spi_master *master, cpu_relax(); } - ret = spfi_wait_all_done(spfi); - if (ret < 0) - return ret; - if (rx_bytes > 0 || tx_bytes > 0) { dev_err(spfi->dev, "PIO transfer timed out\n"); return -ETIMEDOUT; } + ret = spfi_wait_all_done(spfi); + if (ret < 0) + return ret; + return 0; } @@ -439,21 +444,50 @@ static int img_spfi_unprepare(struct spi_master *master, static int img_spfi_setup(struct spi_device *spi) { - int ret; - - ret = gpio_request_one(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? - GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH, - dev_name(&spi->dev)); - if (ret) - dev_err(&spi->dev, "can't request chipselect gpio %d\n", + int ret = -EINVAL; + struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi); + + if (!spfi_data) { + spfi_data = kzalloc(sizeof(*spfi_data), GFP_KERNEL); + if (!spfi_data) + return -ENOMEM; + spfi_data->gpio_requested = false; + spi_set_ctldata(spi, spfi_data); + } + if (!spfi_data->gpio_requested) { + ret = gpio_request_one(spi->cs_gpio, + (spi->mode & SPI_CS_HIGH) ? + GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH, + dev_name(&spi->dev)); + if (ret) + dev_err(&spi->dev, "can't request chipselect gpio %d\n", spi->cs_gpio); - + else + spfi_data->gpio_requested = true; + } else { + if (gpio_is_valid(spi->cs_gpio)) { + int mode = ((spi->mode & SPI_CS_HIGH) ? + GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH); + + ret = gpio_direction_output(spi->cs_gpio, mode); + if (ret) + dev_err(&spi->dev, "chipselect gpio %d setup failed (%d)\n", + spi->cs_gpio, ret); + } + } return ret; } static void img_spfi_cleanup(struct spi_device *spi) { - gpio_free(spi->cs_gpio); + struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi); + + if (spfi_data) { + if (spfi_data->gpio_requested) + gpio_free(spi->cs_gpio); + kfree(spfi_data); + spi_set_ctldata(spi, NULL); + } } static void img_spfi_config(struct spi_master *master, struct spi_device *spi, @@ -491,6 +525,7 @@ static void img_spfi_config(struct spi_master *master, struct spi_device *spi, else if (xfer->tx_nbits == SPI_NBITS_QUAD && xfer->rx_nbits == SPI_NBITS_QUAD) val |= SPFI_CONTROL_TMODE_QUAD << SPFI_CONTROL_TMODE_SHIFT; + val |= SPFI_CONTROL_SE; spfi_writel(spfi, val, SPFI_CONTROL); } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index f08e812b29847..412b9c86b9972 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -201,8 +201,9 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi, { struct spi_imx_data *spi_imx = spi_master_get_devdata(master); - if (spi_imx->dma_is_inited && (transfer->len > spi_imx->rx_wml) - && (transfer->len > spi_imx->tx_wml)) + if (spi_imx->dma_is_inited + && transfer->len > spi_imx->rx_wml * sizeof(u32) + && transfer->len > spi_imx->tx_wml * sizeof(u32)) return true; return false; } diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 861664776672c..ff97cabdaa81f 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -61,6 +61,12 @@ enum orion_spi_type { struct orion_spi_dev { enum orion_spi_type typ; + /* + * min_divisor and max_hz should be exclusive, the only we can + * have both is for managing the armada-370-spi case with old + * device tree + */ + unsigned long max_hz; unsigned int min_divisor; unsigned int max_divisor; u32 prescale_mask; @@ -387,8 +393,9 @@ static const struct orion_spi_dev orion_spi_dev_data = { static const struct orion_spi_dev armada_spi_dev_data = { .typ = ARMADA_SPI, - .min_divisor = 1, + .min_divisor = 4, .max_divisor = 1920, + .max_hz = 50000000, .prescale_mask = ARMADA_SPI_CLK_PRESCALE_MASK, }; @@ -454,7 +461,21 @@ static int orion_spi_probe(struct platform_device *pdev) goto out; tclk_hz = clk_get_rate(spi->clk); - master->max_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->min_divisor); + + /* + * With old device tree, armada-370-spi could be used with + * Armada XP, however for this SoC the maximum frequency is + * 50MHz instead of tclk/4. On Armada 370, tclk cannot be + * higher than 200MHz. So, in order to be able to handle both + * SoCs, we can take the minimum of 50MHz and tclk/4. + */ + if (of_device_is_compatible(pdev->dev.of_node, + "marvell,armada-370-spi")) + master->max_speed_hz = min(devdata->max_hz, + DIV_ROUND_UP(tclk_hz, devdata->min_divisor)); + else + master->max_speed_hz = + DIV_ROUND_UP(tclk_hz, devdata->min_divisor); master->min_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->max_divisor); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index e3223ac75a7c5..f089082c00e1d 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -624,6 +624,10 @@ static irqreturn_t ssp_int(int irq, void *dev_id) if (!(sccr1_reg & SSCR1_TIE)) mask &= ~SSSR_TFS; + /* Ignore RX timeout interrupt if it is disabled */ + if (!(sccr1_reg & SSCR1_TINTE)) + mask &= ~SSSR_TINT; + if (!(status & mask)) return IRQ_NONE; diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index bcc7c635d8e7d..7872f3c78b515 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -48,8 +48,8 @@ struct sh_msiof_spi_priv { const struct sh_msiof_chipdata *chipdata; struct sh_msiof_spi_info *info; struct completion done; - int tx_fifo_size; - int rx_fifo_size; + unsigned int tx_fifo_size; + unsigned int rx_fifo_size; void *tx_dma_page; void *rx_dma_page; dma_addr_t tx_dma_addr; @@ -95,8 +95,6 @@ struct sh_msiof_spi_priv { #define MDR2_WDLEN1(i) (((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */ #define MDR2_GRPMASK1 0x00000001 /* Group Output Mask 1 (SH, A1) */ -#define MAX_WDLEN 256U - /* TSCR and RSCR */ #define SCR_BRPS_MASK 0x1f00 /* Prescaler Setting (1-32) */ #define SCR_BRPS(i) (((i) - 1) << 8) @@ -850,7 +848,12 @@ static int sh_msiof_transfer_one(struct spi_master *master, * DMA supports 32-bit words only, hence pack 8-bit and 16-bit * words, with byte resp. word swapping. */ - unsigned int l = min(len, MAX_WDLEN * 4); + unsigned int l = 0; + + if (tx_buf) + l = min(len, p->tx_fifo_size * 4); + if (rx_buf) + l = min(len, p->rx_fifo_size * 4); if (bits <= 8) { if (l & 3) @@ -963,7 +966,7 @@ static const struct sh_msiof_chipdata sh_data = { static const struct sh_msiof_chipdata r8a779x_data = { .tx_fifo_size = 64, - .rx_fifo_size = 256, + .rx_fifo_size = 64, .master_flags = SPI_MASTER_MUST_TX, }; diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index 133f53a9c1d4e..a339c1e9997a9 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -249,19 +249,23 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) xspi->tx_ptr = t->tx_buf; xspi->rx_ptr = t->rx_buf; remaining_words = t->len / xspi->bytes_per_word; - reinit_completion(&xspi->done); if (xspi->irq >= 0 && remaining_words > xspi->buffer_size) { + u32 isr; use_irq = true; - xspi->write_fn(XSPI_INTR_TX_EMPTY, - xspi->regs + XIPIF_V123B_IISR_OFFSET); - /* Enable the global IPIF interrupt */ - xspi->write_fn(XIPIF_V123B_GINTR_ENABLE, - xspi->regs + XIPIF_V123B_DGIER_OFFSET); /* Inhibit irq to avoid spurious irqs on tx_empty*/ cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET); xspi->write_fn(cr | XSPI_CR_TRANS_INHIBIT, xspi->regs + XSPI_CR_OFFSET); + /* ACK old irqs (if any) */ + isr = xspi->read_fn(xspi->regs + XIPIF_V123B_IISR_OFFSET); + if (isr) + xspi->write_fn(isr, + xspi->regs + XIPIF_V123B_IISR_OFFSET); + /* Enable the global IPIF interrupt */ + xspi->write_fn(XIPIF_V123B_GINTR_ENABLE, + xspi->regs + XIPIF_V123B_DGIER_OFFSET); + reinit_completion(&xspi->done); } while (remaining_words) { @@ -302,8 +306,10 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) remaining_words -= n_words; } - if (use_irq) + if (use_irq) { xspi->write_fn(0, xspi->regs + XIPIF_V123B_DGIER_OFFSET); + xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET); + } return t->len; } diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c index 2e32ea2f194f3..be6155cba9de7 100644 --- a/drivers/spi/spi-xtensa-xtfpga.c +++ b/drivers/spi/spi-xtensa-xtfpga.c @@ -34,13 +34,13 @@ struct xtfpga_spi { static inline void xtfpga_spi_write32(const struct xtfpga_spi *spi, unsigned addr, u32 val) { - iowrite32(val, spi->regs + addr); + __raw_writel(val, spi->regs + addr); } static inline unsigned int xtfpga_spi_read32(const struct xtfpga_spi *spi, unsigned addr) { - return ioread32(spi->regs + addr); + return __raw_readl(spi->regs + addr); } static inline void xtfpga_spi_wait_busy(struct xtfpga_spi *xspi) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 50910d85df5af..029dbd33b4b28 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -988,9 +988,6 @@ void spi_finalize_current_message(struct spi_master *master) spin_lock_irqsave(&master->queue_lock, flags); mesg = master->cur_msg; - master->cur_msg = NULL; - - queue_kthread_work(&master->kworker, &master->pump_messages); spin_unlock_irqrestore(&master->queue_lock, flags); spi_unmap_msg(master, mesg); @@ -1003,9 +1000,13 @@ void spi_finalize_current_message(struct spi_master *master) } } - trace_spi_message_done(mesg); - + spin_lock_irqsave(&master->queue_lock, flags); + master->cur_msg = NULL; master->cur_msg_prepared = false; + queue_kthread_work(&master->kworker, &master->pump_messages); + spin_unlock_irqrestore(&master->queue_lock, flags); + + trace_spi_message_done(mesg); mesg->state = NULL; if (mesg->complete) @@ -1426,8 +1427,7 @@ static struct class spi_master_class = { * * The caller is responsible for assigning the bus number and initializing * the master's methods before calling spi_register_master(); and (after errors - * adding the device) calling spi_master_put() and kfree() to prevent a memory - * leak. + * adding the device) calling spi_master_put() to prevent a memory leak. */ struct spi_master *spi_alloc_master(struct device *dev, unsigned size) { diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 92c909eed6b50..8fab566e0f0bc 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -664,7 +664,8 @@ static int spidev_release(struct inode *inode, struct file *filp) kfree(spidev->rx_buffer); spidev->rx_buffer = NULL; - spidev->speed_hz = spidev->spi->max_speed_hz; + if (spidev->spi) + spidev->speed_hz = spidev->spi->max_speed_hz; /* ... after we unbound from the underlying device? */ spin_lock_irq(&spidev->spi_lock); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index b0b96ab31954a..abbc42a56e7c5 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -1179,13 +1179,13 @@ struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd) mutex_unlock(&client->lock); goto end; } - mutex_unlock(&client->lock); handle = ion_handle_create(client, buffer); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + mutex_unlock(&client->lock); goto end; + } - mutex_lock(&client->lock); ret = ion_handle_add(client, handle); mutex_unlock(&client->lock); if (ret) { diff --git a/drivers/staging/comedi/drivers/adl_pci7x3x.c b/drivers/staging/comedi/drivers/adl_pci7x3x.c index 934af3ff78973..b0fc027cf485d 100644 --- a/drivers/staging/comedi/drivers/adl_pci7x3x.c +++ b/drivers/staging/comedi/drivers/adl_pci7x3x.c @@ -120,8 +120,20 @@ static int adl_pci7x3x_do_insn_bits(struct comedi_device *dev, { unsigned long reg = (unsigned long)s->private; - if (comedi_dio_update_state(s, data)) - outl(s->state, dev->iobase + reg); + if (comedi_dio_update_state(s, data)) { + unsigned int val = s->state; + + if (s->n_chan == 16) { + /* + * It seems the PCI-7230 needs the 16-bit DO state + * to be shifted left by 16 bits before being written + * to the 32-bit register. Set the value in both + * halves of the register to be sure. + */ + val |= val << 16; + } + outl(val, dev->iobase + reg); + } data[1] = s->state; diff --git a/drivers/staging/comedi/drivers/cb_pcimdas.c b/drivers/staging/comedi/drivers/cb_pcimdas.c index c458e5010a745..4ebf5aae50199 100644 --- a/drivers/staging/comedi/drivers/cb_pcimdas.c +++ b/drivers/staging/comedi/drivers/cb_pcimdas.c @@ -243,7 +243,7 @@ static int cb_pcimdas_ao_insn_write(struct comedi_device *dev, return insn->n; } -static int cb_pcimdas_di_insn_read(struct comedi_device *dev, +static int cb_pcimdas_di_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) @@ -258,7 +258,7 @@ static int cb_pcimdas_di_insn_read(struct comedi_device *dev, return insn->n; } -static int cb_pcimdas_do_insn_write(struct comedi_device *dev, +static int cb_pcimdas_do_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) @@ -424,7 +424,7 @@ static int cb_pcimdas_auto_attach(struct comedi_device *dev, s->n_chan = 4; s->maxdata = 1; s->range_table = &range_digital; - s->insn_read = cb_pcimdas_di_insn_read; + s->insn_bits = cb_pcimdas_di_insn_bits; /* Digital Output subdevice (main connector) */ s = &dev->subdevices[4]; @@ -433,7 +433,7 @@ static int cb_pcimdas_auto_attach(struct comedi_device *dev, s->n_chan = 4; s->maxdata = 1; s->range_table = &range_digital; - s->insn_write = cb_pcimdas_do_insn_write; + s->insn_bits = cb_pcimdas_do_insn_bits; /* Counter subdevice (8254) */ s = &dev->subdevices[5]; diff --git a/drivers/staging/comedi/drivers/usbduxsigma.c b/drivers/staging/comedi/drivers/usbduxsigma.c index eaa9add491df7..dc0b25a540886 100644 --- a/drivers/staging/comedi/drivers/usbduxsigma.c +++ b/drivers/staging/comedi/drivers/usbduxsigma.c @@ -550,27 +550,6 @@ static int usbduxsigma_ai_cmdtest(struct comedi_device *dev, if (err) return 3; - /* Step 4: fix up any arguments */ - - if (high_speed) { - /* - * every 2 channels get a time window of 125us. Thus, if we - * sample all 16 channels we need 1ms. If we sample only one - * channel we need only 125us - */ - devpriv->ai_interval = interval; - devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval); - } else { - /* interval always 1ms */ - devpriv->ai_interval = 1; - devpriv->ai_timer = cmd->scan_begin_arg / 1000000; - } - if (devpriv->ai_timer < 1) - err |= -EINVAL; - - if (err) - return 4; - return 0; } @@ -668,6 +647,22 @@ static int usbduxsigma_ai_cmd(struct comedi_device *dev, down(&devpriv->sem); + if (devpriv->high_speed) { + /* + * every 2 channels get a time window of 125us. Thus, if we + * sample all 16 channels we need 1ms. If we sample only one + * channel we need only 125us + */ + unsigned int interval = usbduxsigma_chans_to_interval(len); + + devpriv->ai_interval = interval; + devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval); + } else { + /* interval always 1ms */ + devpriv->ai_interval = 1; + devpriv->ai_timer = cmd->scan_begin_arg / 1000000; + } + for (i = 0; i < len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); @@ -917,25 +912,6 @@ static int usbduxsigma_ao_cmdtest(struct comedi_device *dev, if (err) return 3; - /* Step 4: fix up any arguments */ - - /* we count in timer steps */ - if (high_speed) { - /* timing of the conversion itself: every 125 us */ - devpriv->ao_timer = cmd->convert_arg / 125000; - } else { - /* - * timing of the scan: every 1ms - * we get all channels at once - */ - devpriv->ao_timer = cmd->scan_begin_arg / 1000000; - } - if (devpriv->ao_timer < 1) - err |= -EINVAL; - - if (err) - return 4; - return 0; } @@ -948,6 +924,20 @@ static int usbduxsigma_ao_cmd(struct comedi_device *dev, down(&devpriv->sem); + if (cmd->convert_src == TRIG_TIMER) { + /* + * timing of the conversion itself: every 125 us + * at high speed (not used yet) + */ + devpriv->ao_timer = cmd->convert_arg / 125000; + } else { + /* + * timing of the scan: every 1ms + * we get all channels at once + */ + devpriv->ao_timer = cmd->scan_begin_arg / 1000000; + } + devpriv->ao_counter = devpriv->ao_timer; if (cmd->start_src == TRIG_NOW) { diff --git a/drivers/staging/iio/accel/sca3000_ring.c b/drivers/staging/iio/accel/sca3000_ring.c index 8589eade1057e..de65a8730d888 100644 --- a/drivers/staging/iio/accel/sca3000_ring.c +++ b/drivers/staging/iio/accel/sca3000_ring.c @@ -116,7 +116,7 @@ static int sca3000_read_first_n_hw_rb(struct iio_buffer *r, if (ret) goto error_ret; - for (i = 0; i < num_read; i++) + for (i = 0; i < num_read / sizeof(u16); i++) *(((u16 *)rx) + i) = be16_to_cpup((__be16 *)rx + i); if (copy_to_user(buf, rx, num_read)) diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index d7c5223f1c3e7..2931ea9b75d13 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -919,11 +919,12 @@ static int mxs_lradc_read_raw(struct iio_dev *iio_dev, case IIO_CHAN_INFO_OFFSET: if (chan->type == IIO_TEMP) { /* The calculated value from the ADC is in Kelvin, we - * want Celsius for hwmon so the offset is - * -272.15 * scale + * want Celsius for hwmon so the offset is -273.15 + * The offset is applied before scaling so it is + * actually -213.15 * 4 / 1.012 = -1079.644268 */ - *val = -1075; - *val2 = 691699; + *val = -1079; + *val2 = 644268; return IIO_VAL_INT_PLUS_MICRO; } diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h index 0038d29a37fe1..a470e32c49c1a 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -874,7 +874,7 @@ void lnet_debug_peer(lnet_nid_t nid); static inline void lnet_peer_set_alive(lnet_peer_t *lp) { - lp->lp_last_alive = lp->lp_last_query = get_seconds(); + lp->lp_last_alive = lp->lp_last_query = jiffies; if (!lp->lp_alive) lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); } diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c index 9c934e6d2ea11..c61add46b4268 100644 --- a/drivers/staging/lustre/lustre/obdclass/debug.c +++ b/drivers/staging/lustre/lustre/obdclass/debug.c @@ -40,7 +40,7 @@ #define DEBUG_SUBSYSTEM D_OTHER -#include +#include #include "../include/obd_support.h" #include "../include/lustre_debug.h" diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index d542e06d6cd38..10e520d6bb753 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -1268,6 +1268,7 @@ static int echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) { struct lov_stripe_md *ulsm = _ulsm; + struct lov_oinfo **p; int nob, i; nob = offsetof(struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]); @@ -1277,9 +1278,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) if (copy_to_user(ulsm, lsm, sizeof(*ulsm))) return -EFAULT; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (copy_to_user(ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i], - sizeof(lsm->lsm_oinfo[0]))) + for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) { + struct lov_oinfo __user *up; + if (get_user(up, ulsm->lsm_oinfo + i) || + copy_to_user(up, *p, sizeof(struct lov_oinfo))) return -EFAULT; } return 0; @@ -1287,9 +1289,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) static int echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm, - void *ulsm, int ulsm_nob) + struct lov_stripe_md __user *ulsm, int ulsm_nob) { struct echo_client_obd *ec = ed->ed_ec; + struct lov_oinfo **p; int i; if (ulsm_nob < sizeof(*lsm)) @@ -1305,11 +1308,10 @@ echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm, return -EINVAL; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (copy_from_user(lsm->lsm_oinfo[i], - ((struct lov_stripe_md *)ulsm)-> \ - lsm_oinfo[i], - sizeof(lsm->lsm_oinfo[0]))) + for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) { + struct lov_oinfo __user *up; + if (get_user(up, ulsm->lsm_oinfo + i) || + copy_from_user(*p, up, sizeof(struct lov_oinfo))) return -EFAULT; } return 0; diff --git a/drivers/staging/rtl8712/rtl8712_recv.c b/drivers/staging/rtl8712/rtl8712_recv.c index 50227b598e0c3..fcb8c61b28844 100644 --- a/drivers/staging/rtl8712/rtl8712_recv.c +++ b/drivers/staging/rtl8712/rtl8712_recv.c @@ -1056,7 +1056,8 @@ static int recvbuf2recvframe(struct _adapter *padapter, struct sk_buff *pskb) /* for first fragment packet, driver need allocate 1536 + * drvinfo_sz + RXDESC_SIZE to defrag packet. */ if ((mf == 1) && (frag == 0)) - alloc_sz = 1658;/*1658+6=1664, 1664 is 128 alignment.*/ + /*1658+6=1664, 1664 is 128 alignment.*/ + alloc_sz = max_t(u16, tmp_len, 1658); else alloc_sz = tmp_len; /* 2 is for IP header 4 bytes alignment in QoS packet case. diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index f8b5b332e7c3d..943a0e2045329 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -144,6 +144,7 @@ static struct usb_device_id rtl871x_usb_id_tbl[] = { {USB_DEVICE(0x0DF6, 0x0058)}, {USB_DEVICE(0x0DF6, 0x0049)}, {USB_DEVICE(0x0DF6, 0x004C)}, + {USB_DEVICE(0x0DF6, 0x006C)}, {USB_DEVICE(0x0DF6, 0x0064)}, /* Skyworth */ {USB_DEVICE(0x14b2, 0x3300)}, diff --git a/drivers/staging/speakup/fakekey.c b/drivers/staging/speakup/fakekey.c index 4299cf45f947d..5e1f16c36b49a 100644 --- a/drivers/staging/speakup/fakekey.c +++ b/drivers/staging/speakup/fakekey.c @@ -81,6 +81,7 @@ void speakup_fake_down_arrow(void) __this_cpu_write(reporting_keystroke, true); input_report_key(virt_keyboard, KEY_DOWN, PRESSED); input_report_key(virt_keyboard, KEY_DOWN, RELEASED); + input_sync(virt_keyboard); __this_cpu_write(reporting_keystroke, false); /* reenable preemption */ diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c index a0315701c7d96..ed68b2cfe0311 100644 --- a/drivers/staging/speakup/selection.c +++ b/drivers/staging/speakup/selection.c @@ -141,7 +141,9 @@ static void __speakup_paste_selection(struct work_struct *work) struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); - ld = tty_ldisc_ref_wait(tty); + ld = tty_ldisc_ref(tty); + if (!ld) + goto tty_unref; tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); @@ -161,6 +163,7 @@ static void __speakup_paste_selection(struct work_struct *work) tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); +tty_unref: tty_kref_put(tty); } diff --git a/drivers/staging/speakup/serialio.c b/drivers/staging/speakup/serialio.c index 1d9d51bdf5171..f41a7da1949dc 100644 --- a/drivers/staging/speakup/serialio.c +++ b/drivers/staging/speakup/serialio.c @@ -6,6 +6,11 @@ #include "spk_priv.h" #include "serialio.h" +#include +/* WARNING: Do not change this to without testing that + * SERIAL_PORT_DFNS does get defined to the appropriate value. */ +#include + #ifndef SERIAL_PORT_DFNS #define SERIAL_PORT_DFNS #endif @@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index) int baud = 9600, quot = 0; unsigned int cval = 0; int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8; - const struct old_serial_port *ser = rs_table + index; + const struct old_serial_port *ser; int err; + if (index >= ARRAY_SIZE(rs_table)) { + pr_info("no port info for ttyS%d\n", index); + return NULL; + } + ser = rs_table + index; + /* Divisor, bytesize and parity */ quot = ser->baud_base / baud; cval = cflag & (CSIZE | CSTOPB); diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 0343ae386f035..376e4a0c15c66 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -807,6 +807,10 @@ static int device_rx_srv(struct vnt_private *pDevice, unsigned int uIdx) pRD = pRD->next) { if (works++ > 15) break; + + if (!pRD->pRDInfo->skb) + break; + if (vnt_receive_frame(pDevice, pRD)) { if (!device_alloc_rx_buf(pDevice, pRD)) { dev_err(&pDevice->pcid->dev, @@ -1417,7 +1421,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->current_aid = conf->aid; - if (changed & BSS_CHANGED_BSSID) { + if (changed & BSS_CHANGED_BSSID && conf->bssid) { unsigned long flags; spin_lock_irqsave(&priv->lock, flags); @@ -1482,8 +1486,9 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, } } - if (changed & BSS_CHANGED_ASSOC && priv->op_mode != NL80211_IFTYPE_AP) { - if (conf->assoc) { + if (changed & (BSS_CHANGED_ASSOC | BSS_CHANGED_BEACON_INFO) && + priv->op_mode != NL80211_IFTYPE_AP) { + if (conf->assoc && conf->beacon_rate) { CARDbUpdateTSF(priv, conf->beacon_rate->hw_value, conf->sync_tsf); diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index ab3ab84cb0a71..766fdcece074b 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -701,7 +701,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->current_aid = conf->aid; - if (changed & BSS_CHANGED_BSSID) + if (changed & BSS_CHANGED_BSSID && conf->bssid) vnt_mac_set_bssid_addr(priv, (u8 *)conf->bssid); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 74e6114ff18f9..6f50e9d958de7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -341,7 +341,6 @@ static struct iscsi_np *iscsit_get_np( struct iscsi_np *iscsit_add_np( struct __kernel_sockaddr_storage *sockaddr, - char *ip_str, int network_transport) { struct sockaddr_in *sock_in; @@ -370,11 +369,9 @@ struct iscsi_np *iscsit_add_np( np->np_flags |= NPF_IP_NETWORK; if (sockaddr->ss_family == AF_INET6) { sock_in6 = (struct sockaddr_in6 *)sockaddr; - snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str); np->np_port = ntohs(sock_in6->sin6_port); } else { sock_in = (struct sockaddr_in *)sockaddr; - sprintf(np->np_ip, "%s", ip_str); np->np_port = ntohs(sock_in->sin_port); } @@ -411,8 +408,8 @@ struct iscsi_np *iscsit_add_np( list_add_tail(&np->np_list, &g_np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n", - np->np_ip, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Added Network Portal: %pISc:%hu on %s\n", + &np->np_sockaddr, np->np_port, np->np_transport->name); return np; } @@ -481,8 +478,8 @@ int iscsit_del_np(struct iscsi_np *np) list_del(&np->np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n", - np->np_ip, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Removed Network Portal: %pISc:%hu on %s\n", + &np->np_sockaddr, np->np_port, np->np_transport->name); iscsit_put_transport(np->np_transport); kfree(np); @@ -715,7 +712,7 @@ static int iscsit_add_reject_from_cmd( */ if (cmd->se_cmd.se_tfo != NULL) { pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } return -1; } @@ -968,9 +965,9 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA; conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; - if (hdr->flags & ISCSI_FLAG_CMD_READ) { + if (hdr->flags & ISCSI_FLAG_CMD_READ) cmd->targ_xfer_tag = session_get_next_ttt(conn->sess); - } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE) + else cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); @@ -1001,7 +998,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -1067,7 +1064,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return -1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } } @@ -1083,7 +1080,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (!cmd->sense_reason) return 0; - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1114,7 +1111,6 @@ static int iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, bool dump_payload) { - struct iscsi_conn *conn = cmd->conn; int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. @@ -1141,7 +1137,7 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, rc = iscsit_dump_data_payload(cmd->conn, cmd->first_burst_len, 1); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return rc; } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); @@ -1810,7 +1806,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); sess_ref = true; switch (function) { @@ -1952,7 +1948,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, */ if (sess_ref) { pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); @@ -3467,7 +3463,6 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); - char *fmt_str; if (np->np_network_transport != network_transport) continue; @@ -3495,15 +3490,18 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } } - if (np->np_sockaddr.ss_family == AF_INET6) - fmt_str = "TargetAddress=[%s]:%hu,%hu"; - else - fmt_str = "TargetAddress=%s:%hu,%hu"; - - len = sprintf(buf, fmt_str, - inaddr_any ? conn->local_ip : np->np_ip, - np->np_port, - tpg->tpgt); + if (inaddr_any) { + len = sprintf(buf, "TargetAddress=" + "%s:%hu,%hu", + conn->local_ip, + np->np_port, + tpg->tpgt); + } else { + len = sprintf(buf, "TargetAddress=" + "%pISpc,%hu", + &np->np_sockaddr, + tpg->tpgt); + } len += 1; if ((len + payload_len) > buffer_len) { @@ -4001,7 +3999,13 @@ int iscsi_target_tx_thread(void *arg) } transport_err: - iscsit_take_action_for_connection_exit(conn); + /* + * Avoid the normal connection failure code-path if this connection + * is still within LOGIN mode, and iscsi_np process context is + * responsible for cleaning up the early connection failure. + */ + if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) + iscsit_take_action_for_connection_exit(conn); out: return 0; } @@ -4091,9 +4095,20 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } +static bool iscsi_target_check_conn_state(struct iscsi_conn *conn) +{ + bool ret; + + spin_lock_bh(&conn->state_lock); + ret = (conn->conn_state != TARG_CONN_STATE_LOGGED_IN); + spin_unlock_bh(&conn->state_lock); + + return ret; +} + int iscsi_target_rx_thread(void *arg) { - int ret; + int ret, rc; u8 buffer[ISCSI_HDR_LEN], opcode; u32 checksum = 0, digest = 0; struct iscsi_conn *conn = arg; @@ -4103,10 +4118,16 @@ int iscsi_target_rx_thread(void *arg) * connection recovery / failure event can be triggered externally. */ allow_signal(SIGINT); + /* + * Wait for iscsi_post_login_handler() to complete before allowing + * incoming iscsi/tcp socket I/O, and/or failing the connection. + */ + rc = wait_for_completion_interruptible(&conn->rx_login_comp); + if (rc < 0 || iscsi_target_check_conn_state(conn)) + return 0; if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { struct completion comp; - int rc; init_completion(&comp); rc = wait_for_completion_interruptible(&comp); @@ -4543,7 +4564,18 @@ static void iscsit_logout_post_handler_closesession( struct iscsi_conn *conn) { struct iscsi_session *sess = conn->sess; - int sleep = cmpxchg(&conn->tx_thread_active, true, false); + int sleep = 1; + /* + * Traditional iscsi/tcp will invoke this logic from TX thread + * context during session logout, so clear tx_thread_active and + * sleep if iscsit_close_connection() has not already occured. + * + * Since iser-target invokes this logic from it's own workqueue, + * always sleep waiting for RX/TX thread shutdown to complete + * within iscsit_close_connection(). + */ + if (conn->conn_transport->transport_type == ISCSI_TCP) + sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4557,7 +4589,10 @@ static void iscsit_logout_post_handler_closesession( static void iscsit_logout_post_handler_samecid( struct iscsi_conn *conn) { - int sleep = cmpxchg(&conn->tx_thread_active, true, false); + int sleep = 1; + + if (conn->conn_transport->transport_type == ISCSI_TCP) + sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4776,6 +4811,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) struct iscsi_session *sess; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; struct se_session *se_sess, *se_sess_tmp; + LIST_HEAD(free_list); int session_count = 0; spin_lock_bh(&se_tpg->session_lock); @@ -4797,14 +4833,17 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); - spin_unlock_bh(&se_tpg->session_lock); - iscsit_free_session(sess); - spin_lock_bh(&se_tpg->session_lock); + list_move_tail(&se_sess->sess_list, &free_list); + } + spin_unlock_bh(&se_tpg->session_lock); + list_for_each_entry_safe(se_sess, se_sess_tmp, &free_list, sess_list) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + + iscsit_free_session(sess); session_count++; } - spin_unlock_bh(&se_tpg->session_lock); pr_debug("Released %d iSCSI Session(s) from Target Portal" " Group: %hu\n", session_count, tpg->tpgt); diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 7d0f9c00d9c25..d294f030a0978 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -13,7 +13,7 @@ extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, struct iscsi_np *, int); extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, - char *, int); + int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, struct iscsi_portal_group *, bool); extern int iscsit_del_np(struct iscsi_np *); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 469fce44ebad5..83bb55b944348 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -100,7 +100,7 @@ static ssize_t lio_target_np_store_sctp( * Use existing np->np_sockaddr for SCTP network portal reference */ tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - np->np_ip, tpg_np, ISCSI_SCTP_TCP); + tpg_np, ISCSI_SCTP_TCP); if (!tpg_np_sctp || IS_ERR(tpg_np_sctp)) goto out; } else { @@ -178,7 +178,7 @@ static ssize_t lio_target_np_store_iser( } tpg_np_iser = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - np->np_ip, tpg_np, ISCSI_INFINIBAND); + tpg_np, ISCSI_INFINIBAND); if (IS_ERR(tpg_np_iser)) { rc = PTR_ERR(tpg_np_iser); goto out; @@ -249,8 +249,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg( return ERR_PTR(-EINVAL); } str++; /* Skip over leading "[" */ - *str2 = '\0'; /* Terminate the IPv6 address */ - str2++; /* Skip over the "]" */ + *str2 = '\0'; /* Terminate the unbracketed IPv6 address */ + str2++; /* Skip over the \0 */ port_str = strstr(str2, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -317,7 +317,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/ * */ - tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL, + tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, NULL, ISCSI_TCP); if (IS_ERR(tpg_np)) { iscsit_put_tpg(tpg); @@ -345,8 +345,8 @@ static void lio_target_call_delnpfromtpg( se_tpg = &tpg->tpg_se_tpg; pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu" - " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), - tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port); + " PORTAL: %pISc:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, &tpg_np->tpg_np->np_sockaddr, tpg_np->tpg_np->np_port); ret = iscsit_tpg_del_network_portal(tpg, tpg_np); if (ret < 0) @@ -1907,7 +1907,8 @@ static void lio_tpg_release_fabric_acl( } /* - * Called with spin_lock_bh(struct se_portal_group->session_lock) held.. + * Called with spin_lock_irq(struct se_portal_group->session_lock) held + * or not held. * * Also, this function calls iscsit_inc_session_usage_count() on the * struct iscsi_session in question. @@ -1915,19 +1916,32 @@ static void lio_tpg_release_fabric_acl( static int lio_tpg_shutdown_session(struct se_session *se_sess) { struct iscsi_session *sess = se_sess->fabric_sess_ptr; + struct se_portal_group *se_tpg = se_sess->se_tpg; + bool local_lock = false; + + if (!spin_is_locked(&se_tpg->session_lock)) { + spin_lock_irq(&se_tpg->session_lock); + local_lock = true; + } spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); + if (local_lock) + spin_unlock_irq(&sess->conn_lock); return 0; } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); iscsit_stop_time2retain_timer(sess); + spin_unlock_irq(&se_tpg->session_lock); + iscsit_stop_session(sess, 1, 1); + if (!local_lock) + spin_lock_irq(&se_tpg->session_lock); return 1; } @@ -1967,7 +1981,7 @@ static void lio_set_default_node_attributes(struct se_node_acl *se_acl) static int lio_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void lio_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 70d799dfab03c..39654e917cd80 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -82,6 +82,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) init_completion(&conn->conn_logout_comp); init_completion(&conn->rx_half_close_comp); init_completion(&conn->tx_half_close_comp); + init_completion(&conn->rx_login_comp); spin_lock_init(&conn->cmd_lock); spin_lock_init(&conn->conn_usage_lock); spin_lock_init(&conn->immed_queue_lock); @@ -699,7 +700,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsit_start_kthreads(struct iscsi_conn *conn) +int iscsit_start_kthreads(struct iscsi_conn *conn) { int ret = 0; @@ -734,6 +735,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn) return 0; out_tx: + send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); conn->tx_thread_active = false; out_bitmap: @@ -744,7 +746,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn) return ret; } -int iscsi_post_login_handler( +void iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -754,7 +756,6 @@ int iscsi_post_login_handler( struct se_session *se_sess = sess->se_sess; struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; - int rc; iscsit_inc_conn_usage_count(conn); @@ -795,10 +796,6 @@ int iscsi_post_login_handler( sess->sess_ops->InitiatorName); spin_unlock_bh(&sess->conn_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -807,15 +804,20 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); + if (stop_timer) { spin_lock_bh(&se_tpg->session_lock); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); } iscsit_dec_session_usage_count(sess); - return 0; + return; } iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1); @@ -856,10 +858,6 @@ int iscsi_post_login_handler( " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt); spin_unlock_bh(&se_tpg->session_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -868,10 +866,12 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); - - return 0; } static void iscsi_handle_login_thread_timeout(unsigned long data) @@ -879,8 +879,8 @@ static void iscsi_handle_login_thread_timeout(unsigned long data) struct iscsi_np *np = (struct iscsi_np *) data; spin_lock_bh(&np->np_thread_lock); - pr_err("iSCSI Login timeout on Network Portal %s:%hu\n", - np->np_ip, np->np_port); + pr_err("iSCSI Login timeout on Network Portal %pISc:%hu\n", + &np->np_sockaddr, np->np_port); if (np->np_login_timer_flags & ISCSI_TF_STOP) { spin_unlock_bh(&np->np_thread_lock); @@ -1358,8 +1358,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) spin_lock_bh(&np->np_thread_lock); if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { spin_unlock_bh(&np->np_thread_lock); - pr_err("iSCSI Network Portal on %s:%hu currently not" - " active.\n", np->np_ip, np->np_port); + pr_err("iSCSI Network Portal on %pISc:%hu currently not" + " active.\n", &np->np_sockaddr, np->np_port); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); goto new_sess_out; @@ -1436,23 +1436,12 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (ret < 0) goto new_sess_out; - if (!conn->sess) { - pr_err("struct iscsi_conn session pointer is NULL!\n"); - goto new_sess_out; - } - iscsi_stop_login_thread_timer(np); - if (signal_pending(current)) - goto new_sess_out; - if (ret == 1) { tpg_np = conn->tpg_np; - ret = iscsi_post_login_handler(np, conn, zero_tsih); - if (ret < 0) - goto new_sess_out; - + iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 29d098324b7f9..55cbf4533544a 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,7 +12,8 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); -extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern int iscsit_start_kthreads(struct iscsi_conn *); +extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, bool, bool); extern int iscsi_target_login_thread(void *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 8c02fa34716fa..9a96f1712b7a3 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -17,6 +17,7 @@ ******************************************************************************/ #include +#include #include #include #include @@ -361,10 +362,24 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log ntohl(login_rsp->statsn), login->rsp_length); padding = ((-login->rsp_length) & 3); + /* + * Before sending the last login response containing the transition + * bit for full-feature-phase, go ahead and start up TX/RX threads + * now to avoid potential resource allocation failures after the + * final login response has been sent. + */ + if (login->login_complete) { + int rc = iscsit_start_kthreads(conn); + if (rc) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + } if (conn->conn_transport->iscsit_put_login_tx(conn, login, login->rsp_length + padding) < 0) - return -1; + goto err; login->rsp_length = 0; mutex_lock(&sess->cmdsn_mutex); @@ -373,6 +388,24 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log mutex_unlock(&sess->cmdsn_mutex); return 0; + +err: + if (login->login_complete) { + if (conn->rx_thread && conn->rx_thread_active) { + send_sig(SIGINT, conn->rx_thread, 1); + complete(&conn->rx_login_comp); + kthread_stop(conn->rx_thread); + } + if (conn->tx_thread && conn->tx_thread_active) { + send_sig(SIGINT, conn->tx_thread, 1); + kthread_stop(conn->tx_thread); + } + spin_lock(&iscsit_global->ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id, + get_order(1)); + spin_unlock(&iscsit_global->ts_bitmap_lock); + } + return -1; } static void iscsi_target_sk_data_ready(struct sock *sk) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 5e3295fe404d7..3bc7d62c0a652 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -460,7 +460,6 @@ static bool iscsit_tpg_check_network_portal( struct iscsi_tpg_np *iscsit_tpg_add_network_portal( struct iscsi_portal_group *tpg, struct __kernel_sockaddr_storage *sockaddr, - char *ip_str, struct iscsi_tpg_np *tpg_np_parent, int network_transport) { @@ -470,8 +469,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( if (!tpg_np_parent) { if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr, network_transport)) { - pr_err("Network Portal: %s already exists on a" - " different TPG on %s\n", ip_str, + pr_err("Network Portal: %pISc already exists on a" + " different TPG on %s\n", sockaddr, tpg->tpg_tiqn->tiqn); return ERR_PTR(-EEXIST); } @@ -484,7 +483,7 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( return ERR_PTR(-ENOMEM); } - np = iscsit_add_np(sockaddr, ip_str, network_transport); + np = iscsit_add_np(sockaddr, network_transport); if (IS_ERR(np)) { kfree(tpg_np); return ERR_CAST(np); @@ -514,8 +513,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( spin_unlock(&tpg_np_parent->tpg_np_parent_lock); } - pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Added Network Portal: %pISc:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, np->np_transport->name); return tpg_np; @@ -528,8 +527,8 @@ static int iscsit_tpg_release_np( { iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); - pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Removed Network Portal: %pISc:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, np->np_transport->name); tpg_np->tpg_np = NULL; diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 95ff5bdecd719..28abda89ea98d 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -22,7 +22,7 @@ extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *); extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int); extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *, - struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *, + struct __kernel_sockaddr_storage *, struct iscsi_tpg_np *, int); extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *, struct iscsi_tpg_np *); diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b18edda3e8af8..231e2e0e58945 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -746,7 +746,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; case ISCSI_OP_REJECT: @@ -762,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; } diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index a15411c79ae99..57fd4e14d4eb2 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -328,6 +328,9 @@ static int core_scsi3_pr_seq_non_holder( int legacy = 0; /* Act like a legacy device and return * RESERVATION CONFLICT on some CDBs */ + if (!se_sess->se_node_acl->device_list) + return 0; + se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; /* * Determine if the registration should be ignored due to diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 733824e3825f4..46b966d09af2f 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -321,7 +321,8 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o return 0; } -static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success) +static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success, + int *post_ret) { unsigned char *buf, *addr; struct scatterlist *sg; @@ -385,7 +386,8 @@ sbc_execute_rw(struct se_cmd *cmd) cmd->data_direction); } -static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success) +static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success, + int *post_ret) { struct se_device *dev = cmd->se_dev; @@ -395,8 +397,10 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success) * sent to the backend driver. */ spin_lock_irq(&cmd->t_state_lock); - if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) + if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) { cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + *post_ret = 1; + } spin_unlock_irq(&cmd->t_state_lock); /* @@ -408,7 +412,8 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success) return TCM_NO_SENSE; } -static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success) +static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success, + int *post_ret) { struct se_device *dev = cmd->se_dev; struct scatterlist *write_sg = NULL, *sg; @@ -504,11 +509,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes if (block_size < PAGE_SIZE) { sg_set_page(&write_sg[i], m.page, block_size, - block_size); + m.piter.sg->offset + block_size); } else { sg_miter_next(&m); sg_set_page(&write_sg[i], m.page, block_size, - 0); + m.piter.sg->offset); } len -= block_size; i++; diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 315ec3458eebc..ad48837ead424 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -71,23 +71,25 @@ void core_tmr_release_req(struct se_tmr_req *tmr) if (dev) { spin_lock_irqsave(&dev->se_tmr_lock, flags); - list_del(&tmr->tmr_list); + list_del_init(&tmr->tmr_list); spin_unlock_irqrestore(&dev->se_tmr_lock, flags); } kfree(tmr); } -static void core_tmr_handle_tas_abort( - struct se_node_acl *tmr_nacl, - struct se_cmd *cmd, - int tas) +static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { - bool remove = true; + unsigned long flags; + bool remove = true, send_tas; /* * TASK ABORTED status (TAS) bit support */ - if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) { + spin_lock_irqsave(&cmd->t_state_lock, flags); + send_tas = (cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + + if (send_tas) { remove = false; transport_send_task_abort(cmd); } @@ -110,6 +112,47 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } +static bool __target_check_io_state(struct se_cmd *se_cmd, + struct se_session *tmr_sess, int tas) +{ + struct se_session *sess = se_cmd->se_sess; + + assert_spin_locked(&sess->sess_cmd_lock); + WARN_ON_ONCE(!irqs_disabled()); + /* + * If command already reached CMD_T_COMPLETE state within + * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, + * this se_cmd has been passed to fabric driver and will + * not be aborted. + * + * Otherwise, obtain a local se_cmd->cmd_kref now for TMR + * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as + * long as se_cmd->cmd_kref is still active unless zero. + */ + spin_lock(&se_cmd->t_state_lock); + if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) { + pr_debug("Attempted to abort io tag: %u already complete or" + " fabric stop, skipping\n", + se_cmd->se_tfo->get_task_tag(se_cmd)); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { + pr_debug("Attempted to abort io tag: %u already shutdown," + " skipping\n", se_cmd->se_tfo->get_task_tag(se_cmd)); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + se_cmd->transport_state |= CMD_T_ABORTED; + + if ((tmr_sess != se_cmd->se_sess) && tas) + se_cmd->transport_state |= CMD_T_TAS; + + spin_unlock(&se_cmd->t_state_lock); + + return kref_get_unless_zero(&se_cmd->cmd_kref); +} + void core_tmr_abort_task( struct se_device *dev, struct se_tmr_req *tmr, @@ -136,25 +179,20 @@ void core_tmr_abort_task( printk("ABORT_TASK: Found referenced %s task_tag: %u\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - printk("ABORT_TASK: ref_tag: %u already complete, skipping\n", ref_tag); - spin_unlock(&se_cmd->t_state_lock); + if (!__target_check_io_state(se_cmd, se_sess, 0)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_put_sess_cmd(se_cmd); goto out; } - se_cmd->transport_state |= CMD_T_ABORTED; - spin_unlock(&se_cmd->t_state_lock); list_del_init(&se_cmd->se_cmd_list); - kref_get(&se_cmd->cmd_kref); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_sess, se_cmd); transport_cmd_finish_abort(se_cmd, true); + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %d\n", ref_tag); @@ -175,9 +213,11 @@ static void core_tmr_drain_tmr_list( struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_tmr_list); + struct se_session *sess; struct se_tmr_req *tmr_p, *tmr_pp; struct se_cmd *cmd; unsigned long flags; + bool rc; /* * Release all pending and outgoing TMRs aside from the received * LUN_RESET tmr.. @@ -203,17 +243,39 @@ static void core_tmr_drain_tmr_list( if (target_check_cdb_and_preempt(preempt_and_abort_list, cmd)) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); spin_lock(&cmd->t_state_lock); - if (!(cmd->transport_state & CMD_T_ACTIVE)) { + if (!(cmd->transport_state & CMD_T_ACTIVE) || + (cmd->transport_state & CMD_T_FABRIC_STOP)) { spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); continue; } if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) { spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); + continue; + } + if (sess->sess_tearing_down || cmd->cmd_wait_set) { + spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); continue; } + cmd->transport_state |= CMD_T_ABORTED; spin_unlock(&cmd->t_state_lock); + rc = kref_get_unless_zero(&cmd->cmd_kref); + if (!rc) { + printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n"); + spin_unlock(&sess->sess_cmd_lock); + continue; + } + spin_unlock(&sess->sess_cmd_lock); + list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); } spin_unlock_irqrestore(&dev->se_tmr_lock, flags); @@ -227,20 +289,26 @@ static void core_tmr_drain_tmr_list( (preempt_and_abort_list) ? "Preempt" : "", tmr_p, tmr_p->function, tmr_p->response, cmd->t_state); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); + transport_cmd_finish_abort(cmd, 1); + target_put_sess_cmd(cmd); } } static void core_tmr_drain_state_list( struct se_device *dev, struct se_cmd *prout_cmd, - struct se_node_acl *tmr_nacl, + struct se_session *tmr_sess, int tas, struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_task_list); + struct se_session *sess; struct se_cmd *cmd, *next; unsigned long flags; + int rc; /* * Complete outstanding commands with TASK_ABORTED SAM status. @@ -279,6 +347,16 @@ static void core_tmr_drain_state_list( if (prout_cmd == cmd) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); + rc = __target_check_io_state(cmd, tmr_sess, tas); + spin_unlock(&sess->sess_cmd_lock); + if (!rc) + continue; + list_move_tail(&cmd->state_list, &drain_task_list); cmd->state_active = false; } @@ -286,7 +364,7 @@ static void core_tmr_drain_state_list( while (!list_empty(&drain_task_list)) { cmd = list_entry(drain_task_list.next, struct se_cmd, state_list); - list_del(&cmd->state_list); + list_del_init(&cmd->state_list); pr_debug("LUN_RESET: %s cmd: %p" " ITT/CmdSN: 0x%08x/0x%08x, i_state: %d, t_state: %d" @@ -310,16 +388,11 @@ static void core_tmr_drain_state_list( * loop above, but we do it down here given that * cancel_work_sync may block. */ - if (cmd->t_state == TRANSPORT_COMPLETE) - cancel_work_sync(&cmd->work); - - spin_lock_irqsave(&cmd->t_state_lock, flags); - target_stop_cmd(cmd, &flags); - - cmd->transport_state |= CMD_T_ABORTED; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); + core_tmr_handle_tas_abort(cmd, tas); + target_put_sess_cmd(cmd); } } @@ -331,6 +404,7 @@ int core_tmr_lun_reset( { struct se_node_acl *tmr_nacl = NULL; struct se_portal_group *tmr_tpg = NULL; + struct se_session *tmr_sess = NULL; int tas; /* * TASK_ABORTED status bit, this is configurable via ConfigFS @@ -349,8 +423,9 @@ int core_tmr_lun_reset( * or struct se_device passthrough.. */ if (tmr && tmr->task_cmd && tmr->task_cmd->se_sess) { - tmr_nacl = tmr->task_cmd->se_sess->se_node_acl; - tmr_tpg = tmr->task_cmd->se_sess->se_tpg; + tmr_sess = tmr->task_cmd->se_sess; + tmr_nacl = tmr_sess->se_node_acl; + tmr_tpg = tmr_sess->se_tpg; if (tmr_nacl && tmr_tpg) { pr_debug("LUN_RESET: TMR caller fabric: %s" " initiator port %s\n", @@ -363,7 +438,7 @@ int core_tmr_lun_reset( dev->transport->name, tas); core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list); - core_tmr_drain_state_list(dev, prout_cmd, tmr_nacl, tas, + core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas, preempt_and_abort_list); /* diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 675f2d9d1f14c..be12b9d840522 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -555,9 +555,6 @@ void transport_deregister_session(struct se_session *se_sess) } EXPORT_SYMBOL(transport_deregister_session); -/* - * Called with cmd->t_state_lock held. - */ static void target_remove_from_state_list(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; @@ -582,10 +579,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, { unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (write_pending) - cmd->t_state = TRANSPORT_WRITE_PENDING; - if (remove_from_lists) { target_remove_from_state_list(cmd); @@ -595,6 +588,10 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, cmd->se_lun = NULL; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (write_pending) + cmd->t_state = TRANSPORT_WRITE_PENDING; + /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. @@ -649,6 +646,8 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { + bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); /* @@ -660,7 +659,7 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) if (transport_cmd_check_stop_to_fabric(cmd)) return; - if (remove) + if (remove && ack_kref) transport_put_cmd(cmd); } @@ -728,7 +727,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. */ - if (cmd->transport_state & CMD_T_ABORTED && + if (cmd->transport_state & CMD_T_ABORTED || cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); complete_all(&cmd->t_transport_stop_comp); @@ -1419,7 +1418,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess * for fabrics using TARGET_SCF_ACK_KREF that expect a second * kref_put() to happen during fabric packet acknowledgement. */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) return ret; /* @@ -1433,7 +1432,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess rc = transport_lookup_cmd_lun(se_cmd, unpacked_lun); if (rc) { transport_send_check_condition_and_sense(se_cmd, rc, 0); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return 0; } @@ -1584,7 +1583,7 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, se_cmd->se_tmr_req->ref_task_tag = tag; /* See target_submit_cmd for commentary */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) { core_tmr_release_req(se_cmd->se_tmr_req); return ret; @@ -1638,7 +1637,7 @@ bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags) void transport_generic_request_failure(struct se_cmd *cmd, sense_reason_t sense_reason) { - int ret = 0; + int ret = 0, post_ret = 0; pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08x" " CDB: 0x%02x\n", cmd, cmd->se_tfo->get_task_tag(cmd), @@ -1661,7 +1660,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, */ if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd, false); + cmd->transport_complete_callback(cmd, false, &post_ret); switch (sense_reason) { case TCM_NON_EXISTENT_LUN: @@ -1836,19 +1835,21 @@ static bool target_handle_task_attr(struct se_cmd *cmd) return true; } +static int __transport_check_aborted_status(struct se_cmd *, int); + void target_execute_cmd(struct se_cmd *cmd) { - /* - * If the received CDB has aleady been aborted stop processing it here. - */ - if (transport_check_aborted_status(cmd, 1)) - return; - /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. + * + * If the received CDB has aleady been aborted stop processing it here. */ spin_lock_irq(&cmd->t_state_lock); + if (__transport_check_aborted_status(cmd, 1)) { + spin_unlock_irq(&cmd->t_state_lock); + return; + } if (cmd->transport_state & CMD_T_STOP) { pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", __func__, __LINE__, @@ -2056,11 +2057,13 @@ static void target_complete_ok_work(struct work_struct *work) */ if (cmd->transport_complete_callback) { sense_reason_t rc; + bool caw = (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE); + bool zero_dl = !(cmd->data_length); + int post_ret = 0; - rc = cmd->transport_complete_callback(cmd, true); - if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) { - if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && - !cmd->data_length) + rc = cmd->transport_complete_callback(cmd, true, &post_ret); + if (!rc && !post_ret) { + if (caw && zero_dl) goto queue_rsp; return; @@ -2209,37 +2212,19 @@ static inline void transport_free_pages(struct se_cmd *cmd) } /** - * transport_release_cmd - free a command - * @cmd: command to free + * transport_put_cmd - release a reference to a command + * @cmd: command to release * - * This routine unconditionally frees a command, and reference counting - * or list removal must be done in the caller. + * This routine releases our reference to the command and frees it if possible. */ -static int transport_release_cmd(struct se_cmd *cmd) +static int transport_put_cmd(struct se_cmd *cmd) { BUG_ON(!cmd->se_tfo); - - if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) - core_tmr_release_req(cmd->se_tmr_req); - if (cmd->t_task_cdb != cmd->__t_task_cdb) - kfree(cmd->t_task_cdb); /* * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - return target_put_sess_cmd(cmd->se_sess, cmd); -} - -/** - * transport_put_cmd - release a reference to a command - * @cmd: command to release - * - * This routine releases our reference to the command and frees it if possible. - */ -static int transport_put_cmd(struct se_cmd *cmd) -{ - transport_free_pages(cmd); - return transport_release_cmd(cmd); + return target_put_sess_cmd(cmd); } void *transport_kmap_data_sg(struct se_cmd *cmd) @@ -2437,47 +2422,71 @@ static void transport_write_pending_qf(struct se_cmd *cmd) } } -int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) +static bool +__transport_wait_for_tasks(struct se_cmd *, bool, bool *, bool *, + unsigned long *flags); + +static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas) { unsigned long flags; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + __transport_wait_for_tasks(cmd, true, aborted, tas, &flags); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); +} + +int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) +{ int ret = 0; + bool aborted = false, tas = false; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); - ret = transport_release_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); /* * Handle WRITE failure case where transport_generic_new_cmd() * has already added se_cmd to state_list, but fabric has * failed command before I/O submission. */ - if (cmd->state_active) { - spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->state_active) target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - } if (cmd->se_lun) transport_lun_remove_cmd(cmd); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); + } + /* + * If the task has been internally aborted due to TMR ABORT_TASK + * or LUN_RESET, target_core_tmr.c is responsible for performing + * the remaining calls to target_put_sess_cmd(), and not the + * callers of this function. + */ + if (aborted) { + pr_debug("Detected CMD_T_ABORTED for ITT: %u\n", + cmd->se_tfo->get_task_tag(cmd)); + wait_for_completion(&cmd->cmd_wait_comp); + cmd->se_tfo->release_cmd(cmd); + ret = 1; } return ret; } EXPORT_SYMBOL(transport_generic_free_cmd); /* target_get_sess_cmd - Add command to active ->sess_cmd_list - * @se_sess: session to reference * @se_cmd: command descriptor to add * @ack_kref: Signal that fabric will perform an ack target_put_sess_cmd() */ -int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, - bool ack_kref) +int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) { + struct se_session *se_sess = se_cmd->se_sess; unsigned long flags; int ret = 0; @@ -2499,41 +2508,63 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); if (ret && ack_kref) - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return ret; } EXPORT_SYMBOL(target_get_sess_cmd); +static void target_free_cmd_mem(struct se_cmd *cmd) +{ + transport_free_pages(cmd); + + if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) + core_tmr_release_req(cmd->se_tmr_req); + if (cmd->t_task_cdb != cmd->__t_task_cdb) + kfree(cmd->t_task_cdb); +} + static void target_release_cmd_kref(struct kref *kref) __releases(&se_cmd->se_sess->sess_cmd_lock) { struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); struct se_session *se_sess = se_cmd->se_sess; + bool fabric_stop; if (list_empty(&se_cmd->se_cmd_list)) { spin_unlock(&se_sess->sess_cmd_lock); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return; } - if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { + + spin_lock(&se_cmd->t_state_lock); + fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP); + spin_unlock(&se_cmd->t_state_lock); + + if (se_cmd->cmd_wait_set || fabric_stop) { + list_del_init(&se_cmd->se_cmd_list); spin_unlock(&se_sess->sess_cmd_lock); + target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); spin_unlock(&se_sess->sess_cmd_lock); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); } /* target_put_sess_cmd - Check for active I/O shutdown via kref_put - * @se_sess: session to reference * @se_cmd: command descriptor to drop */ -int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd) +int target_put_sess_cmd(struct se_cmd *se_cmd) { + struct se_session *se_sess = se_cmd->se_sess; + if (!se_sess) { + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return 1; } @@ -2551,6 +2582,7 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) { struct se_cmd *se_cmd; unsigned long flags; + int rc; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (se_sess->sess_tearing_down) { @@ -2560,8 +2592,15 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) se_sess->sess_tearing_down = 1; list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) - se_cmd->cmd_wait_set = 1; + list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) { + rc = kref_get_unless_zero(&se_cmd->cmd_kref); + if (rc) { + se_cmd->cmd_wait_set = 1; + spin_lock(&se_cmd->t_state_lock); + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + spin_unlock(&se_cmd->t_state_lock); + } + } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); } @@ -2574,15 +2613,25 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) { struct se_cmd *se_cmd, *tmp_cmd; unsigned long flags; + bool tas; list_for_each_entry_safe(se_cmd, tmp_cmd, &se_sess->sess_wait_list, se_cmd_list) { - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" " %d\n", se_cmd, se_cmd->t_state, se_cmd->se_tfo->get_cmd_state(se_cmd)); + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + tas = (se_cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + if (!target_put_sess_cmd(se_cmd)) { + if (tas) + target_put_sess_cmd(se_cmd); + } + wait_for_completion(&se_cmd->cmd_wait_comp); pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d" " fabric state: %d\n", se_cmd, se_cmd->t_state, @@ -2625,34 +2674,38 @@ int transport_clear_lun_ref(struct se_lun *lun) return 0; } -/** - * transport_wait_for_tasks - wait for completion to occur - * @cmd: command to wait - * - * Called from frontend fabric context to wait for storage engine - * to pause and/or release frontend generated struct se_cmd. - */ -bool transport_wait_for_tasks(struct se_cmd *cmd) +static bool +__transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop, + bool *aborted, bool *tas, unsigned long *flags) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) { - unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + + if (fabric_stop) + cmd->transport_state |= CMD_T_FABRIC_STOP; + + if (cmd->transport_state & CMD_T_ABORTED) + *aborted = true; + + if (cmd->transport_state & CMD_T_TAS) + *tas = true; + if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) return false; - } if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) return false; - } - if (!(cmd->transport_state & CMD_T_ACTIVE)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + if (!(cmd->transport_state & CMD_T_ACTIVE)) + return false; + + if (fabric_stop && *aborted) return false; - } cmd->transport_state |= CMD_T_STOP; @@ -2661,20 +2714,37 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) cmd, cmd->se_tfo->get_task_tag(cmd), cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + spin_unlock_irqrestore(&cmd->t_state_lock, *flags); wait_for_completion(&cmd->t_transport_stop_comp); - spin_lock_irqsave(&cmd->t_state_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, *flags); cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); pr_debug("wait_for_tasks: Stopped wait_for_completion(" "&cmd->t_transport_stop_comp) for ITT: 0x%08x\n", cmd->se_tfo->get_task_tag(cmd)); + return true; +} + +/** + * transport_wait_for_tasks - wait for completion to occur + * @cmd: command to wait + * + * Called from frontend fabric context to wait for storage engine + * to pause and/or release frontend generated struct se_cmd. + */ +bool transport_wait_for_tasks(struct se_cmd *cmd) +{ + unsigned long flags; + bool ret, aborted = false, tas = false; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + ret = __transport_wait_for_tasks(cmd, false, &aborted, &tas, &flags); spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return true; + return ret; } EXPORT_SYMBOL(transport_wait_for_tasks); @@ -2960,8 +3030,13 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, } EXPORT_SYMBOL(transport_send_check_condition_and_sense); -int transport_check_aborted_status(struct se_cmd *cmd, int send_status) +static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) { + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + if (!(cmd->transport_state & CMD_T_ABORTED)) return 0; @@ -2969,19 +3044,37 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) * If cmd has been aborted but either no status is to be sent or it has * already been sent, just return */ - if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) + if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) { + if (send_status) + cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; return 1; + } - pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08x\n", - cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); + pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:" + " 0x%02x ITT: 0x%08x\n", cmd->t_task_cdb[0], + cmd->se_tfo->get_task_tag(cmd)); cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS; cmd->scsi_status = SAM_STAT_TASK_ABORTED; trace_target_cmd_complete(cmd); + + spin_unlock_irq(&cmd->t_state_lock); cmd->se_tfo->queue_status(cmd); + spin_lock_irq(&cmd->t_state_lock); return 1; } + +int transport_check_aborted_status(struct se_cmd *cmd, int send_status) +{ + int ret; + + spin_lock_irq(&cmd->t_state_lock); + ret = __transport_check_aborted_status(cmd, send_status); + spin_unlock_irq(&cmd->t_state_lock); + + return ret; +} EXPORT_SYMBOL(transport_check_aborted_status); void transport_send_task_abort(struct se_cmd *cmd) @@ -3003,11 +3096,17 @@ void transport_send_task_abort(struct se_cmd *cmd) */ if (cmd->data_direction == DMA_TO_DEVICE) { if (cmd->se_tfo->write_pending_status(cmd) != 0) { - cmd->transport_state |= CMD_T_ABORTED; + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) { + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto send_abort; + } cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); return; } } +send_abort: cmd->scsi_status = SAM_STAT_TASK_ABORTED; transport_lun_remove_cmd(cmd); @@ -3025,8 +3124,17 @@ static void target_tmr_work(struct work_struct *work) struct se_cmd *cmd = container_of(work, struct se_cmd, work); struct se_device *dev = cmd->se_dev; struct se_tmr_req *tmr = cmd->se_tmr_req; + unsigned long flags; int ret; + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) { + tmr->response = TMR_FUNCTION_REJECTED; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto check_stop; + } + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + switch (tmr->function) { case TMR_ABORT_TASK: core_tmr_abort_task(dev, tmr, cmd->se_sess); @@ -3054,9 +3162,17 @@ static void target_tmr_work(struct work_struct *work) break; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) { + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto check_stop; + } cmd->t_state = TRANSPORT_ISTATE_PROCESSING; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cmd->se_tfo->queue_tm_rsp(cmd); +check_stop: transport_cmd_check_stop_to_fabric(cmd); } diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index 1738b16469887..9fc33e84439ae 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -48,7 +48,7 @@ target_scsi3_ua_check(struct se_cmd *cmd) return 0; nacl = sess->se_node_acl; - if (!nacl) + if (!nacl || !nacl->device_list) return 0; deve = nacl->device_list[cmd->orig_fe_lun]; @@ -90,7 +90,7 @@ int core_scsi3_ua_allocate( /* * PASSTHROUGH OPS */ - if (!nacl) + if (!nacl || !nacl->device_list) return -EINVAL; ua = kmem_cache_zalloc(se_ua_cache, GFP_ATOMIC); @@ -208,7 +208,7 @@ void core_scsi3_ua_for_check_condition( return; nacl = sess->se_node_acl; - if (!nacl) + if (!nacl || !nacl->device_list) return; spin_lock_irq(&nacl->device_list_lock); @@ -276,7 +276,7 @@ int core_scsi3_ua_clear_for_request_sense( return -EINVAL; nacl = sess->se_node_acl; - if (!nacl) + if (!nacl || !nacl->device_list) return -EINVAL; spin_lock_irq(&nacl->device_list_lock); diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 8fd680ac941bd..4609305a15916 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -465,6 +465,8 @@ int target_xcopy_setup_pt(void) memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); INIT_LIST_HEAD(&xcopy_pt_sess.sess_list); INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_cmd_list); + spin_lock_init(&xcopy_pt_sess.sess_cmd_lock); xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; @@ -666,7 +668,7 @@ static int target_xcopy_read_source( pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", (unsigned long long)src_lba, src_sectors, length); - transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->src_pt_cmd = xpt_cmd; @@ -726,7 +728,7 @@ static int target_xcopy_write_destination( pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", (unsigned long long)dst_lba, dst_sectors, length); - transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->dst_pt_cmd = xpt_cmd; diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 1d30b09756515..67098a8a7a021 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -1209,6 +1209,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) if (!IS_ERR(data->clk_sec)) clk_unprepare(data->clk_sec); err_sensor: + if (!IS_ERR_OR_NULL(data->regulator)) + regulator_disable(data->regulator); thermal_zone_of_sensor_unregister(&pdev->dev, data->tzd); return ret; diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 5a0f12d08e8b8..ec4ea5940bf76 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance, next_target = instance->target; dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state); + if (!instance->initialized) { + if (throttle) { + next_target = (cur_state + 1) >= instance->upper ? + instance->upper : + ((cur_state + 1) < instance->lower ? + instance->lower : (cur_state + 1)); + } else { + next_target = THERMAL_NO_TARGET; + } + + return next_target; + } + switch (trend) { case THERMAL_TREND_RAISING: if (throttle) { @@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n", old_target, (int)instance->target); - if (old_target == instance->target) + if (instance->initialized && old_target == instance->target) continue; /* Activate a passive thermal instance */ @@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) instance->target == THERMAL_NO_TARGET) update_passive_instance(tz, trip_type, -1); - + instance->initialized = true; instance->cdev->updated = false; /* cdev needs update */ } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 4108db7e10c10..a3282bfb343dc 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static atomic_t in_suspend; + static struct thermal_governor *def_governor; static struct thermal_governor *__find_governor(const char *name) @@ -471,14 +474,31 @@ static void update_temperature(struct thermal_zone_device *tz) mutex_unlock(&tz->lock); trace_thermal_temperature(tz); - dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", - tz->last_temperature, tz->temperature); + if (tz->last_temperature == THERMAL_TEMP_INVALID) + dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n", + tz->temperature); + else + dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", + tz->last_temperature, tz->temperature); +} + +static void thermal_zone_device_reset(struct thermal_zone_device *tz) +{ + struct thermal_instance *pos; + + tz->temperature = THERMAL_TEMP_INVALID; + tz->passive = 0; + list_for_each_entry(pos, &tz->thermal_instances, tz_node) + pos->initialized = false; } void thermal_zone_device_update(struct thermal_zone_device *tz) { int count; + if (atomic_read(&in_suspend)) + return; + if (!tz->ops->get_temp) return; @@ -1016,6 +1036,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, if (!result) { list_add_tail(&dev->tz_node, &tz->thermal_instances); list_add_tail(&dev->cdev_node, &cdev->thermal_instances); + atomic_set(&tz->need_update, 1); } mutex_unlock(&cdev->lock); mutex_unlock(&tz->lock); @@ -1122,6 +1143,7 @@ __thermal_cooling_device_register(struct device_node *np, const struct thermal_cooling_device_ops *ops) { struct thermal_cooling_device *cdev; + struct thermal_zone_device *pos = NULL; int result; if (type && strlen(type) >= THERMAL_NAME_LENGTH) @@ -1166,6 +1188,12 @@ __thermal_cooling_device_register(struct device_node *np, /* Update binding information for 'this' new cdev */ bind_cdev(cdev); + mutex_lock(&thermal_list_lock); + list_for_each_entry(pos, &thermal_tz_list, node) + if (atomic_cmpxchg(&pos->need_update, 1, 0)) + thermal_zone_device_update(pos); + mutex_unlock(&thermal_list_lock); + return cdev; } @@ -1496,6 +1524,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, tz->trips = trips; tz->passive_delay = passive_delay; tz->polling_delay = polling_delay; + /* A new thermal zone needs to be updated anyway. */ + atomic_set(&tz->need_update, 1); dev_set_name(&tz->device, "thermal_zone%d", tz->id); result = device_register(&tz->device); @@ -1576,7 +1606,10 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!tz->ops->get_temp) thermal_zone_device_set_polling(tz, 0); - thermal_zone_device_update(tz); + thermal_zone_device_reset(tz); + /* Update the new thermal zone and mark it as already updated. */ + if (atomic_cmpxchg(&tz->need_update, 1, 0)) + thermal_zone_device_update(tz); return tz; @@ -1810,6 +1843,36 @@ static void thermal_unregister_governors(void) thermal_gov_user_space_unregister(); } +static int thermal_pm_notify(struct notifier_block *nb, + unsigned long mode, void *_unused) +{ + struct thermal_zone_device *tz; + + switch (mode) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + case PM_SUSPEND_PREPARE: + atomic_set(&in_suspend, 1); + break; + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + atomic_set(&in_suspend, 0); + list_for_each_entry(tz, &thermal_tz_list, node) { + thermal_zone_device_reset(tz); + thermal_zone_device_update(tz); + } + break; + default: + break; + } + return 0; +} + +static struct notifier_block thermal_pm_nb = { + .notifier_call = thermal_pm_notify, +}; + static int __init thermal_init(void) { int result; @@ -1830,6 +1893,11 @@ static int __init thermal_init(void) if (result) goto exit_netlink; + result = register_pm_notifier(&thermal_pm_nb); + if (result) + pr_warn("Thermal: Can not register suspend notifier, return %d\n", + result); + return 0; exit_netlink: @@ -1849,6 +1917,7 @@ static int __init thermal_init(void) static void __exit thermal_exit(void) { + unregister_pm_notifier(&thermal_pm_nb); of_thermal_destroy_zones(); genetlink_exit(); class_unregister(&thermal_class); diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 8e391812e5037..dce86ee8e9d71 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -41,6 +41,7 @@ struct thermal_instance { struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; int trip; + bool initialized; unsigned long upper; /* Highest cooling state for this trip point */ unsigned long lower; /* Lowest cooling state for this trip point */ unsigned long target; /* expected cooling state */ diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 396344cb011fd..33088c70ef3b1 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty, { struct n_tty_data *ldata = tty->disc_data; - tty_audit_add_data(tty, to, n, ldata->icanon); + tty_audit_add_data(tty, from, n, ldata->icanon); return copy_to_user(to, from, n); } @@ -258,16 +258,13 @@ static void n_tty_check_throttle(struct tty_struct *tty) static void n_tty_check_unthrottle(struct tty_struct *tty) { - if (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) { + if (tty->driver->type == TTY_DRIVER_TYPE_PTY) { if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE) return; if (!tty->count) return; n_tty_kick_worker(tty); - n_tty_write_wakeup(tty->link); - if (waitqueue_active(&tty->link->write_wait)) - wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT); + tty_wakeup(tty->link); return; } @@ -343,8 +340,7 @@ static void n_tty_packet_mode_flush(struct tty_struct *tty) spin_lock_irqsave(&tty->ctrl_lock, flags); tty->ctrl_status |= TIOCPKT_FLUSHREAD; spin_unlock_irqrestore(&tty->ctrl_lock, flags); - if (waitqueue_active(&tty->link->read_wait)) - wake_up_interruptible(&tty->link->read_wait); + wake_up_interruptible(&tty->link->read_wait); } } @@ -1108,19 +1104,29 @@ static void eraser(unsigned char c, struct tty_struct *tty) * Locking: ctrl_lock */ -static void isig(int sig, struct tty_struct *tty) +static void __isig(int sig, struct tty_struct *tty) { - struct n_tty_data *ldata = tty->disc_data; struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, sig, 1); put_pid(tty_pgrp); } +} + +static void isig(int sig, struct tty_struct *tty) +{ + struct n_tty_data *ldata = tty->disc_data; + + if (L_NOFLSH(tty)) { + /* signal only */ + __isig(sig, tty); - if (!L_NOFLSH(tty)) { + } else { /* signal and flush */ up_read(&tty->termios_rwsem); down_write(&tty->termios_rwsem); + __isig(sig, tty); + /* clear echo buffer */ mutex_lock(&ldata->output_lock); ldata->echo_head = ldata->echo_tail = 0; @@ -1373,8 +1379,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c) put_tty_queue(c, ldata); smp_store_release(&ldata->canon_head, ldata->read_head); kill_fasync(&tty->fasync, SIGIO, POLL_IN); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible_poll(&tty->read_wait, POLLIN); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); return 0; } } @@ -1660,8 +1665,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp, if ((read_cnt(ldata) >= ldata->minimum_to_wake) || L_EXTPROC(tty)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible_poll(&tty->read_wait, POLLIN); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); } } @@ -1880,10 +1884,8 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) } /* The termios change make the tty ready for I/O */ - if (waitqueue_active(&tty->write_wait)) - wake_up_interruptible(&tty->write_wait); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible(&tty->read_wait); + wake_up_interruptible(&tty->write_wait); + wake_up_interruptible(&tty->read_wait); } /** diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 4d5e8409769c3..254c183a5efed 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -672,7 +672,14 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) /* this is called once with whichever end is closed last */ static void pty_unix98_shutdown(struct tty_struct *tty) { - devpts_kill_index(tty->driver_data, tty->index); + struct inode *ptmx_inode; + + if (tty->driver->subtype == PTY_TYPE_MASTER) + ptmx_inode = tty->driver_data; + else + ptmx_inode = tty->link->driver_data; + devpts_kill_index(ptmx_inode, tty->index); + devpts_del_ref(ptmx_inode); } static const struct tty_operations ptm_unix98_ops = { @@ -764,6 +771,18 @@ static int ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ tty->driver_data = inode; + /* + * In the case where all references to ptmx inode are dropped and we + * still have /dev/tty opened pointing to the master/slave pair (ptmx + * is closed/released before /dev/tty), we must make sure that the inode + * is still valid when we call the final pty_unix98_shutdown, thus we + * hold an additional reference to the ptmx inode. For the same /dev/tty + * last close case, we also need to make sure the super_block isn't + * destroyed (devpts instance unmounted), before /dev/tty is closed and + * on its release devpts_kill_index is called. + */ + devpts_add_ref(inode); + tty_add_file(tty, filp); slave_inode = devpts_pty_new(inode, diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 4506e405c8f39..b4fd8debf941b 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -339,6 +339,14 @@ configured less than Maximum supported fifo bytes */ UART_FCR7_64BYTE, .flags = UART_CAP_FIFO, }, + [PORT_RT2880] = { + .name = "Palmchip BK-3103", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .rxtrig_bytes = {1, 4, 8, 14}, + .flags = UART_CAP_FIFO, + }, }; /* Uart divisor latch read */ diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index 21d01a491405a..e508939daea3f 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -80,10 +80,6 @@ int serial8250_tx_dma(struct uart_8250_port *p) return 0; dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); - if (dma->tx_size < p->port.fifosize) { - ret = -EINVAL; - goto err; - } desc = dmaengine_prep_slave_single(dma->txchan, dma->tx_addr + xmit->tail, diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 46bcebba54b2f..b82b2a0f82a35 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1380,6 +1380,9 @@ ce4100_serial_setup(struct serial_private *priv, #define PCI_DEVICE_ID_INTEL_BSW_UART1 0x228a #define PCI_DEVICE_ID_INTEL_BSW_UART2 0x228c +#define PCI_DEVICE_ID_INTEL_BDW_UART1 0x9ce3 +#define PCI_DEVICE_ID_INTEL_BDW_UART2 0x9ce4 + #define BYT_PRV_CLK 0x800 #define BYT_PRV_CLK_EN (1 << 0) #define BYT_PRV_CLK_M_VAL_SHIFT 1 @@ -1458,11 +1461,13 @@ byt_serial_setup(struct serial_private *priv, switch (pdev->device) { case PCI_DEVICE_ID_INTEL_BYT_UART1: case PCI_DEVICE_ID_INTEL_BSW_UART1: + case PCI_DEVICE_ID_INTEL_BDW_UART1: rx_param->src_id = 3; tx_param->dst_id = 2; break; case PCI_DEVICE_ID_INTEL_BYT_UART2: case PCI_DEVICE_ID_INTEL_BSW_UART2: + case PCI_DEVICE_ID_INTEL_BDW_UART2: rx_param->src_id = 5; tx_param->dst_id = 4; break; @@ -1997,9 +2002,17 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCIE_VENDOR_ID_WCH 0x1c00 #define PCIE_DEVICE_ID_WCH_CH382_2S1P 0x3250 #define PCIE_DEVICE_ID_WCH_CH384_4S 0x3470 +#define PCIE_DEVICE_ID_WCH_CH382_2S 0x3253 +#define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358 #define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 +#define PCI_VENDOR_ID_PERICOM 0x12D8 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7951 0x7951 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7952 0x7952 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7954 0x7954 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7958 0x7958 + /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 @@ -2147,6 +2160,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = byt_serial_setup, }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BDW_UART1, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = byt_serial_setup, + }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BDW_UART2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = byt_serial_setup, + }, /* * ITE */ @@ -2314,27 +2341,12 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { * Pericom */ { - .vendor = 0x12d8, - .device = 0x7952, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_pericom_setup, - }, - { - .vendor = 0x12d8, - .device = 0x7954, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_pericom_setup, - }, - { - .vendor = 0x12d8, - .device = 0x7958, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_pericom_setup, + .vendor = PCI_VENDOR_ID_PERICOM, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, }, - /* * PLX */ @@ -2522,6 +2534,13 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17v35x_setup, }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V4358, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, { .vendor = PCI_VENDOR_ID_EXAR, .device = PCI_DEVICE_ID_EXAR_XR17V8358, @@ -2711,6 +2730,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_wch_ch353_setup, }, + /* WCH CH382 2S card (16850 clone) */ + { + .vendor = PCIE_VENDOR_ID_WCH, + .device = PCIE_DEVICE_ID_WCH_CH382_2S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_wch_ch38x_setup, + }, /* WCH CH382 2S1P card (16850 clone) */ { .vendor = PCIE_VENDOR_ID_WCH, @@ -3008,6 +3035,7 @@ enum pci_board_num_t { pbn_exar_XR17V352, pbn_exar_XR17V354, pbn_exar_XR17V358, + pbn_exar_XR17V4358, pbn_exar_XR17V8358, pbn_exar_ibm_saturn, pbn_pasemi_1682M, @@ -3030,7 +3058,12 @@ enum pci_board_num_t { pbn_fintek_4, pbn_fintek_8, pbn_fintek_12, + pbn_wch382_2, pbn_wch384_4, + pbn_pericom_PI7C9X7951, + pbn_pericom_PI7C9X7952, + pbn_pericom_PI7C9X7954, + pbn_pericom_PI7C9X7958, }; /* @@ -3695,6 +3728,14 @@ static struct pciserial_board pci_boards[] = { .reg_shift = 0, .first_offset = 0, }, + [pbn_exar_XR17V4358] = { + .flags = FL_BASE0, + .num_ports = 12, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, [pbn_exar_XR17V8358] = { .flags = FL_BASE0, .num_ports = 16, @@ -3848,7 +3889,13 @@ static struct pciserial_board pci_boards[] = { .base_baud = 115200, .first_offset = 0x40, }, - + [pbn_wch382_2] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + .first_offset = 0xC0, + }, [pbn_wch384_4] = { .flags = FL_BASE0, .num_ports = 4, @@ -3856,6 +3903,33 @@ static struct pciserial_board pci_boards[] = { .uart_offset = 8, .first_offset = 0xC0, }, + /* + * Pericom PI7C9X795[1248] Uno/Dual/Quad/Octal UART + */ + [pbn_pericom_PI7C9X7951] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 0x8, + }, + [pbn_pericom_PI7C9X7952] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 921600, + .uart_offset = 0x8, + }, + [pbn_pericom_PI7C9X7954] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 921600, + .uart_offset = 0x8, + }, + [pbn_pericom_PI7C9X7958] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 921600, + .uart_offset = 0x8, + }, }; static const struct pci_device_id blacklist[] = { @@ -5112,10 +5186,33 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17V358 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V4358, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V4358 }, { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V8358, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17V8358 }, + /* + * Pericom PI7C9X795[1248] Uno/Dual/Quad/Octal UART + */ + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7951, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7951 }, + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7952, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7952 }, + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7954, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7958, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7958 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ @@ -5542,6 +5639,16 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000, pbn_byt }, + /* Intel Broadwell */ + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000, + pbn_byt }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000, + pbn_byt }, + /* * Intel Penwell */ @@ -5601,6 +5708,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_2_115200 }, + { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH382_2S, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_wch382_2 }, + { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_wch384_4 }, diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index 50a09cd76d50a..658b392d1170d 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -41,6 +41,12 @@ static const struct pnp_device_id pnp_dev_table[] = { { "AEI1240", 0 }, /* Rockwell 56K ACF II Fax+Data+Voice Modem */ { "AKY1021", 0 /*SPCI_FL_NO_SHIRQ*/ }, + /* + * ALi Fast Infrared Controller + * Native driver (ali-ircc) is broken so at least + * it can be used with irtty-sir. + */ + { "ALI5123", 0 }, /* AZT3005 PnP SOUND DEVICE */ { "AZT4001", 0 }, /* Best Data Products Inc. Smart One 336F PnP Modem */ @@ -364,6 +370,11 @@ static const struct pnp_device_id pnp_dev_table[] = { /* Winbond CIR port, should not be probed. We should keep track of it to prevent the legacy serial driver from probing it */ { "WEC1022", CIR_PORT }, + /* + * SMSC IrCC SIR/FIR port, should not be probed by serial driver + * as well so its own driver can bind to it. + */ + { "SMCF010", CIR_PORT }, { "", 0 } }; diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index f8120c1bde147..8cd35348fc193 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -241,7 +241,6 @@ config SERIAL_SAMSUNG tristate "Samsung SoC serial support" depends on PLAT_SAMSUNG || ARCH_EXYNOS select SERIAL_CORE - select SERIAL_EARLYCON help Support for the on-chip UARTs on the Samsung S3C24XX series CPUs, providing /dev/ttySAC0, 1 and 2 (note, some machines may not @@ -277,6 +276,7 @@ config SERIAL_SAMSUNG_CONSOLE bool "Support for console on Samsung SoC serial port" depends on SERIAL_SAMSUNG=y select SERIAL_CORE_CONSOLE + select SERIAL_EARLYCON help Allow selection of the S3C24XX on-board serial ports for use as an virtual console. diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 763eb20fe3213..0cc622afb67d4 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1360,9 +1360,9 @@ static void pl011_tx_softirq(struct work_struct *work) struct uart_amba_port *uap = container_of(dwork, struct uart_amba_port, tx_softirq_work); - spin_lock(&uap->port.lock); + spin_lock_irq(&uap->port.lock); while (pl011_tx_chars(uap)) ; - spin_unlock(&uap->port.lock); + spin_unlock_irq(&uap->port.lock); } static void pl011_tx_irq_seen(struct uart_amba_port *uap) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 27dade29646b7..85323ff75edf6 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -315,8 +315,7 @@ static int atmel_config_rs485(struct uart_port *port, if (rs485conf->flags & SER_RS485_ENABLED) { dev_dbg(port->dev, "Setting UART to RS485\n"); atmel_port->tx_done_mask = ATMEL_US_TXEMPTY; - if ((rs485conf->delay_rts_after_send) > 0) - UART_PUT_TTGR(port, rs485conf->delay_rts_after_send); + UART_PUT_TTGR(port, rs485conf->delay_rts_after_send); mode |= ATMEL_US_USMODE_RS485; } else { dev_dbg(port->dev, "Setting UART to RS232\n"); @@ -354,8 +353,7 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl) /* override mode to RS485 if needed, otherwise keep the current mode */ if (port->rs485.flags & SER_RS485_ENABLED) { - if ((port->rs485.delay_rts_after_send) > 0) - UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); + UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); mode &= ~ATMEL_US_USMODE; mode |= ATMEL_US_USMODE_RS485; } @@ -2061,8 +2059,7 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, /* mode */ if (port->rs485.flags & SER_RS485_ENABLED) { - if ((port->rs485.delay_rts_after_send) > 0) - UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); + UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); mode |= ATMEL_US_USMODE_RS485; } else if (termios->c_cflag & CRTSCTS) { /* RS232 with hardware handshake (RTS/CTS) */ @@ -2643,7 +2640,7 @@ static int atmel_serial_probe(struct platform_device *pdev) ret = atmel_init_gpios(port, &pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Failed to initialize GPIOs."); - goto err; + goto err_clear_bit; } ret = atmel_init_port(port, pdev); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 88250395b0ce9..01aa52f574e52 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1132,11 +1132,6 @@ static int imx_startup(struct uart_port *port) while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) && (--i > 0)) udelay(1); - /* Can we enable the DMA support? */ - if (is_imx6q_uart(sport) && !uart_console(port) && - !sport->dma_is_inited) - imx_uart_dma_init(sport); - spin_lock_irqsave(&sport->port.lock, flags); /* @@ -1145,9 +1140,6 @@ static int imx_startup(struct uart_port *port) writel(USR1_RTSD, sport->port.membase + USR1); writel(USR2_ORE, sport->port.membase + USR2); - if (sport->dma_is_inited && !sport->dma_is_enabled) - imx_enable_dma(sport); - temp = readl(sport->port.membase + UCR1); temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN; @@ -1318,6 +1310,11 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, } else { ucr2 |= UCR2_CTSC; } + + /* Can we enable the DMA support? */ + if (is_imx6q_uart(sport) && !uart_console(port) + && !sport->dma_is_inited) + imx_uart_dma_init(sport); } else { termios->c_cflag &= ~CRTSCTS; } @@ -1434,6 +1431,8 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, if (UART_ENABLE_MS(&sport->port, termios->c_cflag)) imx_enable_ms(&sport->port); + if (sport->dma_is_inited && !sport->dma_is_enabled) + imx_enable_dma(sport); spin_unlock_irqrestore(&sport->port.lock, flags); } diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c index 35c55505b3eb3..5a41b8fbb10a8 100644 --- a/drivers/tty/serial/men_z135_uart.c +++ b/drivers/tty/serial/men_z135_uart.c @@ -392,7 +392,6 @@ static irqreturn_t men_z135_intr(int irq, void *data) struct men_z135_port *uart = (struct men_z135_port *)data; struct uart_port *port = &uart->port; bool handled = false; - unsigned long flags; int irq_id; uart->stat_reg = ioread32(port->membase + MEN_Z135_STAT_REG); @@ -401,7 +400,7 @@ static irqreturn_t men_z135_intr(int irq, void *data) if (!irq_id) goto out; - spin_lock_irqsave(&port->lock, flags); + spin_lock(&port->lock); /* It's save to write to IIR[7:6] RXC[9:8] */ iowrite8(irq_id, port->membase + MEN_Z135_STAT_REG); @@ -427,7 +426,7 @@ static irqreturn_t men_z135_intr(int irq, void *data) handled = true; } - spin_unlock_irqrestore(&port->lock, flags); + spin_unlock(&port->lock); out: return IRQ_RETVAL(handled); } @@ -717,7 +716,7 @@ static void men_z135_set_termios(struct uart_port *port, baud = uart_get_baud_rate(port, termios, old, 0, uart_freq / 16); - spin_lock(&port->lock); + spin_lock_irq(&port->lock); if (tty_termios_baud_rate(termios)) tty_termios_encode_baud_rate(termios, baud, baud); @@ -725,7 +724,7 @@ static void men_z135_set_termios(struct uart_port *port, iowrite32(bd_reg, port->membase + MEN_Z135_BAUD_REG); uart_update_timeout(port, termios->c_cflag, baud); - spin_unlock(&port->lock); + spin_unlock_irq(&port->lock); } static const char *men_z135_type(struct uart_port *port) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7f49172ccd867..0a88693cd8ca0 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1368,7 +1368,7 @@ static inline void serial_omap_add_console_port(struct uart_omap_port *up) /* Enable or disable the rs485 support */ static int -serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf) +serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485) { struct uart_omap_port *up = to_uart_omap_port(port); unsigned int mode; @@ -1381,8 +1381,12 @@ serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf) up->ier = 0; serial_out(up, UART_IER, 0); + /* Clamp the delays to [0, 100ms] */ + rs485->delay_rts_before_send = min(rs485->delay_rts_before_send, 100U); + rs485->delay_rts_after_send = min(rs485->delay_rts_after_send, 100U); + /* store new config */ - port->rs485 = *rs485conf; + port->rs485 = *rs485; /* * Just as a precaution, only allow rs485 diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index a0ae942d9562d..1e0d9b8c48c93 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -295,15 +295,6 @@ static int s3c24xx_serial_start_tx_dma(struct s3c24xx_uart_port *ourport, if (ourport->tx_mode != S3C24XX_TX_DMA) enable_tx_dma(ourport); - while (xmit->tail & (dma_get_cache_alignment() - 1)) { - if (rd_regl(port, S3C2410_UFSTAT) & ourport->info->tx_fifofull) - return 0; - wr_regb(port, S3C2410_UTXH, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - count--; - } - dma->tx_size = count & ~(dma_get_cache_alignment() - 1); dma->tx_transfer_addr = dma->tx_addr + xmit->tail; @@ -342,7 +333,9 @@ static void s3c24xx_serial_start_next_tx(struct s3c24xx_uart_port *ourport) return; } - if (!ourport->dma || !ourport->dma->tx_chan || count < port->fifosize) + if (!ourport->dma || !ourport->dma->tx_chan || + count < ourport->min_dma_size || + xmit->tail & (dma_get_cache_alignment() - 1)) s3c24xx_serial_start_tx_pio(ourport); else s3c24xx_serial_start_tx_dma(ourport, count); @@ -736,15 +729,20 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) struct uart_port *port = &ourport->port; struct circ_buf *xmit = &port->state->xmit; unsigned long flags; - int count; + int count, dma_count = 0; spin_lock_irqsave(&port->lock, flags); count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); - if (ourport->dma && ourport->dma->tx_chan && count >= port->fifosize) { - s3c24xx_serial_start_tx_dma(ourport, count); - goto out; + if (ourport->dma && ourport->dma->tx_chan && + count >= ourport->min_dma_size) { + int align = dma_get_cache_alignment() - + (xmit->tail & (dma_get_cache_alignment() - 1)); + if (count-align >= ourport->min_dma_size) { + dma_count = count-align; + count = align; + } } if (port->x_char) { @@ -765,14 +763,24 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) /* try and drain the buffer... */ - count = port->fifosize; - while (!uart_circ_empty(xmit) && count-- > 0) { + if (count > port->fifosize) { + count = port->fifosize; + dma_count = 0; + } + + while (!uart_circ_empty(xmit) && count > 0) { if (rd_regl(port, S3C2410_UFSTAT) & ourport->info->tx_fifofull) break; wr_regb(port, S3C2410_UTXH, xmit->buf[xmit->tail]); xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); port->icount.tx++; + count--; + } + + if (!count && dma_count) { + s3c24xx_serial_start_tx_dma(ourport, dma_count); + goto out; } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) { @@ -1838,6 +1846,13 @@ static int s3c24xx_serial_probe(struct platform_device *pdev) else if (ourport->info->fifosize) ourport->port.fifosize = ourport->info->fifosize; + /* + * DMA transfers must be aligned at least to cache line size, + * so find minimal transfer size suitable for DMA mode + */ + ourport->min_dma_size = max_t(int, ourport->port.fifosize, + dma_get_cache_alignment()); + probe_index++; dbg("%s: initialising port %p...\n", __func__, ourport); diff --git a/drivers/tty/serial/samsung.h b/drivers/tty/serial/samsung.h index d275032aa68d4..fc5deaa4f382d 100644 --- a/drivers/tty/serial/samsung.h +++ b/drivers/tty/serial/samsung.h @@ -82,6 +82,7 @@ struct s3c24xx_uart_port { unsigned char tx_claimed; unsigned int pm_level; unsigned long baudclk_rate; + unsigned int min_dma_size; unsigned int rx_irq; unsigned int tx_irq; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0b7bb12dfc68b..ec540445bb71f 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1409,7 +1409,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp) mutex_lock(&port->mutex); uart_shutdown(tty, state); tty_port_tty_set(port, NULL); - tty->closing = 0; + spin_lock_irqsave(&port->lock, flags); if (port->blocked_open) { @@ -1435,6 +1435,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp) mutex_unlock(&port->mutex); tty_ldisc_flush(tty); + tty->closing = 0; } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 843f2cdc280b9..9ffdfcf2ec6ed 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -55,9 +55,6 @@ static int __read_mostly sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static bool __read_mostly sysrq_always_enabled; -unsigned short platform_sysrq_reset_seq[] __weak = { KEY_RESERVED }; -int sysrq_reset_downtime_ms __weak; - static bool sysrq_on(void) { return sysrq_enabled || sysrq_always_enabled; @@ -569,6 +566,7 @@ void handle_sysrq(int key) EXPORT_SYMBOL(handle_sysrq); #ifdef CONFIG_INPUT +static int sysrq_reset_downtime_ms; /* Simple translation table for the SysRq keys */ static const unsigned char sysrq_xlate[KEY_CNT] = @@ -949,23 +947,8 @@ static bool sysrq_handler_registered; static inline void sysrq_register_handler(void) { - unsigned short key; int error; - int i; - - /* First check if a __weak interface was instantiated. */ - for (i = 0; i < ARRAY_SIZE(sysrq_reset_seq); i++) { - key = platform_sysrq_reset_seq[i]; - if (key == KEY_RESERVED || key > KEY_MAX) - break; - - sysrq_reset_seq[sysrq_reset_seq_len++] = key; - } - /* - * DT configuration takes precedence over anything that would - * have been defined via the __weak interface. - */ sysrq_of_get_keyreset_config(); error = input_register_handler(&sysrq_handler); diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 90ca082935f63..3d245cd3d8e62 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty, * * Audit @data of @size from @tty, if necessary. */ -void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon) { struct tty_audit_buf *buf; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index e5695467598f9..be96970646a96 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1287,18 +1287,22 @@ int tty_send_xchar(struct tty_struct *tty, char ch) int was_stopped = tty->stopped; if (tty->ops->send_xchar) { + down_read(&tty->termios_rwsem); tty->ops->send_xchar(tty, ch); + up_read(&tty->termios_rwsem); return 0; } if (tty_write_lock(tty, 0) < 0) return -ERESTARTSYS; + down_read(&tty->termios_rwsem); if (was_stopped) start_tty(tty); tty->ops->write(tty, &ch, 1); if (was_stopped) stop_tty(tty); + up_read(&tty->termios_rwsem); tty_write_unlock(tty); return 0; } @@ -2144,8 +2148,24 @@ static int tty_open(struct inode *inode, struct file *filp) if (!noctty && current->signal->leader && !current->signal->tty && - tty->session == NULL) - __proc_set_tty(tty); + tty->session == NULL) { + /* + * Don't let a process that only has write access to the tty + * obtain the privileges associated with having a tty as + * controlling terminal (being able to reopen it with full + * access through /dev/tty, being able to perform pushback). + * Many distributions set the group of all ttys to "tty" and + * grant write-only access to all terminals for setgid tty + * binaries, which should not imply full privileges on all ttys. + * + * This could theoretically break old code that performs open() + * on a write-only file descriptor. In that case, it might be + * necessary to also permit this if + * inode_permission(inode, MAY_READ) == 0. + */ + if (filp->f_mode & FMODE_READ) + __proc_set_tty(tty); + } spin_unlock_irq(¤t->sighand->siglock); read_unlock(&tasklist_lock); tty_unlock(tty); @@ -2434,7 +2454,7 @@ static int fionbio(struct file *file, int __user *p) * Takes ->siglock() when updating signal->tty */ -static int tiocsctty(struct tty_struct *tty, int arg) +static int tiocsctty(struct tty_struct *tty, struct file *file, int arg) { int ret = 0; @@ -2468,6 +2488,13 @@ static int tiocsctty(struct tty_struct *tty, int arg) goto unlock; } } + + /* See the comment in tty_open(). */ + if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto unlock; + } + proc_set_tty(tty); unlock: read_unlock(&tasklist_lock); @@ -2642,6 +2669,28 @@ static int tiocsetd(struct tty_struct *tty, int __user *p) return ret; } +/** + * tiocgetd - get line discipline + * @tty: tty device + * @p: pointer to user data + * + * Retrieves the line discipline id directly from the ldisc. + * + * Locking: waits for ldisc reference (in case the line discipline + * is changing or the tty is being hungup) + */ + +static int tiocgetd(struct tty_struct *tty, int __user *p) +{ + struct tty_ldisc *ld; + int ret; + + ld = tty_ldisc_ref_wait(tty); + ret = put_user(ld->ops->num, p); + tty_ldisc_deref(ld); + return ret; +} + /** * send_break - performed time break * @tty: device to break on @@ -2860,7 +2909,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) no_tty(); return 0; case TIOCSCTTY: - return tiocsctty(tty, arg); + return tiocsctty(tty, file, arg); case TIOCGPGRP: return tiocgpgrp(tty, real_tty, p); case TIOCSPGRP: @@ -2868,7 +2917,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: - return put_user(tty->ldisc->ops->num, (int __user *)p); + return tiocgetd(tty, p); case TIOCSETD: return tiocsetd(tty, p); case TIOCVHANGUP: diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 8e53fe4696647..7bbf86b94716a 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -1144,16 +1144,12 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, spin_unlock_irq(&tty->flow_lock); break; case TCIOFF: - down_read(&tty->termios_rwsem); if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); - up_read(&tty->termios_rwsem); break; case TCION: - down_read(&tty->termios_rwsem); if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); - up_read(&tty->termios_rwsem); break; default: return -EINVAL; diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 389f0e0342596..846ceb91ec14e 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -56,7 +56,7 @@ static const struct of_device_id ci_hdrc_imx_dt_ids[] = { { .compatible = "fsl,imx27-usb", .data = &imx27_usb_data}, { .compatible = "fsl,imx6q-usb", .data = &imx6q_usb_data}, { .compatible = "fsl,imx6sl-usb", .data = &imx6sl_usb_data}, - { .compatible = "fsl,imx6sx-usb", .data = &imx6sl_usb_data}, + { .compatible = "fsl,imx6sx-usb", .data = &imx6sx_usb_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, ci_hdrc_imx_dt_ids); @@ -68,6 +68,12 @@ struct ci_hdrc_imx_data { struct imx_usbmisc_data *usbmisc_data; bool supports_runtime_pm; bool in_lpm; + /* SoC before i.mx6 (except imx23/imx28) needs three clks */ + bool need_three_clks; + struct clk *clk_ipg; + struct clk *clk_ahb; + struct clk *clk_per; + /* --------------------------------- */ }; /* Common functions shared by usbmisc drivers */ @@ -119,6 +125,102 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) } /* End of common functions shared by usbmisc drivers*/ +static int imx_get_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + data->clk_ipg = devm_clk_get(dev, "ipg"); + if (IS_ERR(data->clk_ipg)) { + /* If the platform only needs one clocks */ + data->clk = devm_clk_get(dev, NULL); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + dev_err(dev, + "Failed to get clks, err=%ld,%ld\n", + PTR_ERR(data->clk), PTR_ERR(data->clk_ipg)); + return ret; + } + return ret; + } + + data->clk_ahb = devm_clk_get(dev, "ahb"); + if (IS_ERR(data->clk_ahb)) { + ret = PTR_ERR(data->clk_ahb); + dev_err(dev, + "Failed to get ahb clock, err=%d\n", ret); + return ret; + } + + data->clk_per = devm_clk_get(dev, "per"); + if (IS_ERR(data->clk_per)) { + ret = PTR_ERR(data->clk_per); + dev_err(dev, + "Failed to get per clock, err=%d\n", ret); + return ret; + } + + data->need_three_clks = true; + return ret; +} + +static int imx_prepare_enable_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + if (data->need_three_clks) { + ret = clk_prepare_enable(data->clk_ipg); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ipg clk, err=%d\n", + ret); + return ret; + } + + ret = clk_prepare_enable(data->clk_ahb); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ahb clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + + ret = clk_prepare_enable(data->clk_per); + if (ret) { + dev_err(dev, + "Failed to prepare/enable per clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + } else { + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(dev, + "Failed to prepare/enable clk, err=%d\n", + ret); + return ret; + } + } + + return ret; +} + +static void imx_disable_unprepare_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + + if (data->need_three_clks) { + clk_disable_unprepare(data->clk_per); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + } else { + clk_disable_unprepare(data->clk); + } +} static int ci_hdrc_imx_probe(struct platform_device *pdev) { @@ -137,23 +239,18 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + platform_set_drvdata(pdev, data); data->usbmisc_data = usbmisc_get_init_data(&pdev->dev); if (IS_ERR(data->usbmisc_data)) return PTR_ERR(data->usbmisc_data); - data->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(data->clk)) { - dev_err(&pdev->dev, - "Failed to get clock, err=%ld\n", PTR_ERR(data->clk)); - return PTR_ERR(data->clk); - } + ret = imx_get_clks(&pdev->dev); + if (ret) + return ret; - ret = clk_prepare_enable(data->clk); - if (ret) { - dev_err(&pdev->dev, - "Failed to prepare or enable clock, err=%d\n", ret); + ret = imx_prepare_enable_clks(&pdev->dev); + if (ret) return ret; - } data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { @@ -196,8 +293,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) goto disable_device; } - platform_set_drvdata(pdev, data); - if (data->supports_runtime_pm) { pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); @@ -210,7 +305,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); err_clk: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return ret; } @@ -224,7 +319,7 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); } ci_hdrc_remove_device(data->ci_pdev); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return 0; } @@ -236,7 +331,7 @@ static int imx_controller_suspend(struct device *dev) dev_dbg(dev, "at %s\n", __func__); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); data->in_lpm = true; return 0; @@ -254,7 +349,7 @@ static int imx_controller_resume(struct device *dev) return 0; } - ret = clk_prepare_enable(data->clk); + ret = imx_prepare_enable_clks(dev); if (ret) return ret; @@ -269,7 +364,7 @@ static int imx_controller_resume(struct device *dev) return 0; clk_disable: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); return ret; } diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 74fea4fa41b15..3ad48e1c0c57e 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -1024,7 +1024,18 @@ static struct platform_driver ci_hdrc_driver = { }, }; -module_platform_driver(ci_hdrc_driver); +static int __init ci_hdrc_platform_register(void) +{ + ci_hdrc_host_driver_init(); + return platform_driver_register(&ci_hdrc_driver); +} +module_init(ci_hdrc_platform_register); + +static void __exit ci_hdrc_platform_unregister(void) +{ + platform_driver_unregister(&ci_hdrc_driver); +} +module_exit(ci_hdrc_platform_unregister); MODULE_ALIAS("platform:ci_hdrc"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 21fe1a3143135..2f8af40e87ca7 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -237,9 +237,12 @@ int ci_hdrc_host_init(struct ci_hdrc *ci) rdrv->name = "host"; ci->roles[CI_ROLE_HOST] = rdrv; + return 0; +} + +void ci_hdrc_host_driver_init(void) +{ ehci_init_driver(&ci_ehci_hc_driver, &ehci_ci_overrides); orig_bus_suspend = ci_ehci_hc_driver.bus_suspend; ci_ehci_hc_driver.bus_suspend = ci_ehci_bus_suspend; - - return 0; } diff --git a/drivers/usb/chipidea/host.h b/drivers/usb/chipidea/host.h index 5707bf379bfb4..0f12f131bdd3f 100644 --- a/drivers/usb/chipidea/host.h +++ b/drivers/usb/chipidea/host.h @@ -5,6 +5,7 @@ int ci_hdrc_host_init(struct ci_hdrc *ci); void ci_hdrc_host_destroy(struct ci_hdrc *ci); +void ci_hdrc_host_driver_init(void); #else @@ -18,6 +19,11 @@ static inline void ci_hdrc_host_destroy(struct ci_hdrc *ci) } +static void ci_hdrc_host_driver_init(void) +{ + +} + #endif #endif /* __DRIVERS_USB_CHIPIDEA_HOST_H */ diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index ad6c87a4653c2..fbc6285905a6d 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -118,7 +118,7 @@ static void ci_otg_work(struct work_struct *work) int ci_hdrc_otg_init(struct ci_hdrc *ci) { INIT_WORK(&ci->work, ci_otg_work); - ci->wq = create_singlethread_workqueue("ci_otg"); + ci->wq = create_freezable_workqueue("ci_otg"); if (!ci->wq) { dev_err(ci->dev, "can't create workqueue\n"); return -ENODEV; diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 764f668d45a9b..92937c14f8189 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -656,6 +656,44 @@ __acquires(hwep->lock) return 0; } +static int _ep_set_halt(struct usb_ep *ep, int value, bool check_transfer) +{ + struct ci_hw_ep *hwep = container_of(ep, struct ci_hw_ep, ep); + int direction, retval = 0; + unsigned long flags; + + if (ep == NULL || hwep->ep.desc == NULL) + return -EINVAL; + + if (usb_endpoint_xfer_isoc(hwep->ep.desc)) + return -EOPNOTSUPP; + + spin_lock_irqsave(hwep->lock, flags); + + if (value && hwep->dir == TX && check_transfer && + !list_empty(&hwep->qh.queue) && + !usb_endpoint_xfer_control(hwep->ep.desc)) { + spin_unlock_irqrestore(hwep->lock, flags); + return -EAGAIN; + } + + direction = hwep->dir; + do { + retval |= hw_ep_set_halt(hwep->ci, hwep->num, hwep->dir, value); + + if (!value) + hwep->wedge = 0; + + if (hwep->type == USB_ENDPOINT_XFER_CONTROL) + hwep->dir = (hwep->dir == TX) ? RX : TX; + + } while (hwep->dir != direction); + + spin_unlock_irqrestore(hwep->lock, flags); + return retval; +} + + /** * _gadget_stop_activity: stops all USB activity, flushes & disables all endpts * @gadget: gadget @@ -1051,7 +1089,7 @@ __acquires(ci->lock) num += ci->hw_ep_max / 2; spin_unlock(&ci->lock); - err = usb_ep_set_halt(&ci->ci_hw_ep[num].ep); + err = _ep_set_halt(&ci->ci_hw_ep[num].ep, 1, false); spin_lock(&ci->lock); if (!err) isr_setup_status_phase(ci); @@ -1110,8 +1148,8 @@ __acquires(ci->lock) if (err < 0) { spin_unlock(&ci->lock); - if (usb_ep_set_halt(&hwep->ep)) - dev_err(ci->dev, "error: ep_set_halt\n"); + if (_ep_set_halt(&hwep->ep, 1, false)) + dev_err(ci->dev, "error: _ep_set_halt\n"); spin_lock(&ci->lock); } } @@ -1142,9 +1180,9 @@ __acquires(ci->lock) err = isr_setup_status_phase(ci); if (err < 0) { spin_unlock(&ci->lock); - if (usb_ep_set_halt(&hwep->ep)) + if (_ep_set_halt(&hwep->ep, 1, false)) dev_err(ci->dev, - "error: ep_set_halt\n"); + "error: _ep_set_halt\n"); spin_lock(&ci->lock); } } @@ -1390,41 +1428,7 @@ static int ep_dequeue(struct usb_ep *ep, struct usb_request *req) */ static int ep_set_halt(struct usb_ep *ep, int value) { - struct ci_hw_ep *hwep = container_of(ep, struct ci_hw_ep, ep); - int direction, retval = 0; - unsigned long flags; - - if (ep == NULL || hwep->ep.desc == NULL) - return -EINVAL; - - if (usb_endpoint_xfer_isoc(hwep->ep.desc)) - return -EOPNOTSUPP; - - spin_lock_irqsave(hwep->lock, flags); - -#ifndef STALL_IN - /* g_file_storage MS compliant but g_zero fails chapter 9 compliance */ - if (value && hwep->type == USB_ENDPOINT_XFER_BULK && hwep->dir == TX && - !list_empty(&hwep->qh.queue)) { - spin_unlock_irqrestore(hwep->lock, flags); - return -EAGAIN; - } -#endif - - direction = hwep->dir; - do { - retval |= hw_ep_set_halt(hwep->ci, hwep->num, hwep->dir, value); - - if (!value) - hwep->wedge = 0; - - if (hwep->type == USB_ENDPOINT_XFER_CONTROL) - hwep->dir = (hwep->dir == TX) ? RX : TX; - - } while (hwep->dir != direction); - - spin_unlock_irqrestore(hwep->lock, flags); - return retval; + return _ep_set_halt(ep, value, true); } /** @@ -1726,6 +1730,22 @@ static int ci_udc_start(struct usb_gadget *gadget, return retval; } +static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci) +{ + if (!ci_otg_is_fsm_mode(ci)) + return; + + mutex_lock(&ci->fsm.lock); + if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) { + ci->fsm.a_bidl_adis_tmout = 1; + ci_hdrc_otg_fsm_start(ci); + } else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) { + ci->fsm.protocol = PROTO_UNDEF; + ci->fsm.otg->state = OTG_STATE_UNDEFINED; + } + mutex_unlock(&ci->fsm.lock); +} + /** * ci_udc_stop: unregister a gadget driver */ @@ -1750,6 +1770,7 @@ static int ci_udc_stop(struct usb_gadget *gadget) ci->driver = NULL; spin_unlock_irqrestore(&ci->lock, flags); + ci_udc_stop_for_otg_fsm(ci); return 0; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5c8f58114677d..df3deb000a808 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -432,7 +432,8 @@ static void acm_read_bulk_callback(struct urb *urb) set_bit(rb->index, &acm->read_urbs_free); dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n", __func__, status); - return; + if ((status != -ENOENT) || (urb->actual_length == 0)) + return; } usb_mark_last_busy(acm->dev); @@ -1414,6 +1415,8 @@ static int acm_probe(struct usb_interface *intf, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), NULL, acm->writesize, acm_write_bulk, snd); snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + if (quirks & SEND_ZERO_PACKET) + snd->urb->transfer_flags |= URB_ZERO_PACKET; snd->instance = acm; } @@ -1477,6 +1480,11 @@ static int acm_probe(struct usb_interface *intf, goto alloc_fail8; } + if (quirks & CLEAR_HALT_CONDITIONS) { + usb_clear_halt(usb_dev, usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress)); + usb_clear_halt(usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress)); + } + return 0; alloc_fail8: if (acm->country_codes) { @@ -1756,6 +1764,10 @@ static const struct usb_device_id acm_ids[] = { .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ + .driver_info = CLEAR_HALT_CONDITIONS, + }, + /* Nokia S60 phones expose two ACM channels. The first is * a modem and is picked up by the standard AT-command * information below. The second is 'vendor-specific' but @@ -1839,6 +1851,16 @@ static const struct usb_device_id acm_ids[] = { }, #endif + /*Samsung phone in firmware update mode */ + { USB_DEVICE(0x04e8, 0x685d), + .driver_info = IGNORE_DEVICE, + }, + + /* Exclude Infineon Flash Loader utility */ + { USB_DEVICE(0x058b, 0x0041), + .driver_info = IGNORE_DEVICE, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, @@ -1857,6 +1879,10 @@ static const struct usb_device_id acm_ids[] = { { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_CDMA) }, + { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */ + .driver_info = SEND_ZERO_PACKET, + }, + { } }; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index ffeb3c83941f5..ac830e0ae38ba 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -133,3 +133,5 @@ struct acm { #define NO_DATA_INTERFACE BIT(4) #define IGNORE_DEVICE BIT(5) #define QUIRK_CONTROL_LINE_STATE BIT(6) +#define CLEAR_HALT_CONDITIONS BIT(7) +#define SEND_ZERO_PACKET BIT(8) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 0924ee40a9661..b9adc2ec49ddb 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -869,11 +869,11 @@ static int usblp_wwait(struct usblp *usblp, int nonblock) add_wait_queue(&usblp->wwait, &waita); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); if (mutex_lock_interruptible(&usblp->mut)) { rc = -EINTR; break; } + set_current_state(TASK_INTERRUPTIBLE); rc = usblp_wtest(usblp, nonblock); mutex_unlock(&usblp->mut); if (rc <= 0) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index b2a540b43f97c..894894f2ff935 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -112,16 +112,18 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 16; } else if (usb_endpoint_xfer_isoc(&ep->desc) && - desc->bmAttributes > 2) { + USB_SS_MULT(desc->bmAttributes) > 3) { dev_warn(ddev, "Isoc endpoint has Mult of %d in " "config %d interface %d altsetting %d ep %d: " - "setting to 3\n", desc->bmAttributes + 1, + "setting to 3\n", + USB_SS_MULT(desc->bmAttributes), cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 2; } if (usb_endpoint_xfer_isoc(&ep->desc)) - max_tx = (desc->bMaxBurst + 1) * (desc->bmAttributes + 1) * + max_tx = (desc->bMaxBurst + 1) * + (USB_SS_MULT(desc->bmAttributes)) * usb_endpoint_maxp(&ep->desc); else if (usb_endpoint_xfer_int(&ep->desc)) max_tx = usb_endpoint_maxp(&ep->desc) * diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 4b0448c26810c..986abde076831 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -513,7 +513,7 @@ static void async_completed(struct urb *urb) snoop(&urb->dev->dev, "urb complete\n"); snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length, as->status, COMPLETE, NULL, 0); - if ((urb->transfer_flags & URB_DIR_MASK) == USB_DIR_IN) + if ((urb->transfer_flags & URB_DIR_MASK) == URB_DIR_IN) snoop_urb_data(urb, urb->actual_length); if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET && diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 45a915ccd71c0..1c1385e3a8248 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1022,9 +1022,12 @@ static int register_root_hub(struct usb_hcd *hcd) dev_name(&usb_dev->dev), retval); return (retval < 0) ? retval : -EMSGSIZE; } - if (usb_dev->speed == USB_SPEED_SUPER) { + + if (le16_to_cpu(usb_dev->descriptor.bcdUSB) >= 0x0201) { retval = usb_get_bos_descriptor(usb_dev); - if (retval < 0) { + if (!retval) { + usb_dev->lpm_capable = usb_device_supports_lpm(usb_dev); + } else if (usb_dev->speed == USB_SPEED_SUPER) { mutex_unlock(&usb_bus_list_lock); dev_dbg(parent_dev, "can't read %s bos descriptor %d\n", dev_name(&usb_dev->dev), retval); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3b71516877768..e56ad83b35a46 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -122,8 +122,12 @@ struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev) return usb_get_intfdata(hdev->actconfig->interface[0]); } -static int usb_device_supports_lpm(struct usb_device *udev) +int usb_device_supports_lpm(struct usb_device *udev) { + /* Some devices have trouble with LPM */ + if (udev->quirks & USB_QUIRK_NO_LPM) + return 0; + /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. */ @@ -1030,10 +1034,20 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) unsigned delay; /* Continue a partial initialization */ - if (type == HUB_INIT2) - goto init2; - if (type == HUB_INIT3) + if (type == HUB_INIT2 || type == HUB_INIT3) { + device_lock(hub->intfdev); + + /* Was the hub disconnected while we were waiting? */ + if (hub->disconnected) { + device_unlock(hub->intfdev); + kref_put(&hub->kref, hub_release); + return; + } + if (type == HUB_INIT2) + goto init2; goto init3; + } + kref_get(&hub->kref); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1231,6 +1245,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) queue_delayed_work(system_power_efficient_wq, &hub->init_work, msecs_to_jiffies(delay)); + device_unlock(hub->intfdev); return; /* Continues at init3: below */ } else { msleep(delay); @@ -1252,6 +1267,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Allow autosuspend if it was suppressed */ if (type <= HUB_INIT3) usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); + + if (type == HUB_INIT2 || type == HUB_INIT3) + device_unlock(hub->intfdev); + + kref_put(&hub->kref, hub_release); } /* Implement the continuations for the delays above */ @@ -2616,9 +2636,6 @@ static bool use_new_scheme(struct usb_device *udev, int retry) return USE_NEW_SCHEME(retry); } -static int hub_port_reset(struct usb_hub *hub, int port1, - struct usb_device *udev, unsigned int delay, bool warm); - /* Is a USB 3.0 port in the Inactive or Compliance Mode state? * Port worm reset is required to recover */ @@ -2706,44 +2723,6 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, return 0; } -static void hub_port_finish_reset(struct usb_hub *hub, int port1, - struct usb_device *udev, int *status) -{ - switch (*status) { - case 0: - /* TRSTRCY = 10 ms; plus some extra */ - msleep(10 + 40); - if (udev) { - struct usb_hcd *hcd = bus_to_hcd(udev->bus); - - update_devnum(udev, 0); - /* The xHC may think the device is already reset, - * so ignore the status. - */ - if (hcd->driver->reset_device) - hcd->driver->reset_device(hcd, udev); - } - /* FALL THROUGH */ - case -ENOTCONN: - case -ENODEV: - usb_clear_port_feature(hub->hdev, - port1, USB_PORT_FEAT_C_RESET); - if (hub_is_superspeed(hub->hdev)) { - usb_clear_port_feature(hub->hdev, port1, - USB_PORT_FEAT_C_BH_PORT_RESET); - usb_clear_port_feature(hub->hdev, port1, - USB_PORT_FEAT_C_PORT_LINK_STATE); - usb_clear_port_feature(hub->hdev, port1, - USB_PORT_FEAT_C_CONNECTION); - } - if (udev) - usb_set_device_state(udev, *status - ? USB_STATE_NOTATTACHED - : USB_STATE_DEFAULT); - break; - } -} - /* Handle port reset and port warm(BH) reset (for USB3 protocol ports) */ static int hub_port_reset(struct usb_hub *hub, int port1, struct usb_device *udev, unsigned int delay, bool warm) @@ -2767,13 +2746,10 @@ static int hub_port_reset(struct usb_hub *hub, int port1, * If the caller hasn't explicitly requested a warm reset, * double check and see if one is needed. */ - status = hub_port_status(hub, port1, - &portstatus, &portchange); - if (status < 0) - goto done; - - if (hub_port_warm_reset_required(hub, port1, portstatus)) - warm = true; + if (hub_port_status(hub, port1, &portstatus, &portchange) == 0) + if (hub_port_warm_reset_required(hub, port1, + portstatus)) + warm = true; } clear_bit(port1, hub->warm_reset_bits); @@ -2799,11 +2775,19 @@ static int hub_port_reset(struct usb_hub *hub, int port1, /* Check for disconnect or reset */ if (status == 0 || status == -ENOTCONN || status == -ENODEV) { - hub_port_finish_reset(hub, port1, udev, &status); + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_RESET); if (!hub_is_superspeed(hub->hdev)) goto done; + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_BH_PORT_RESET); + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_PORT_LINK_STATE); + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_CONNECTION); + /* * If a USB 3.0 device migrates from reset to an error * state, re-issue the warm reset. @@ -2836,6 +2820,26 @@ static int hub_port_reset(struct usb_hub *hub, int port1, dev_err(&port_dev->dev, "Cannot enable. Maybe the USB cable is bad?\n"); done: + if (status == 0) { + /* TRSTRCY = 10 ms; plus some extra */ + msleep(10 + 40); + if (udev) { + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + + update_devnum(udev, 0); + /* The xHC may think the device is already reset, + * so ignore the status. + */ + if (hcd->driver->reset_device) + hcd->driver->reset_device(hcd, udev); + + usb_set_device_state(udev, USB_STATE_DEFAULT); + } + } else { + if (udev) + usb_set_device_state(udev, USB_STATE_NOTATTACHED); + } + if (!hub_is_superspeed(hub->hdev)) up_read(&ehci_cf_port_reset_rwsem); @@ -4509,6 +4513,8 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1, goto fail; } + usb_detect_quirks(udev); + if (udev->wusb == 0 && le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) { retval = usb_get_bos_descriptor(udev); if (!retval) { @@ -4707,7 +4713,6 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, if (status < 0) goto loop; - usb_detect_quirks(udev); if (udev->quirks & USB_QUIRK_DELAY_INIT) msleep(1000); @@ -5323,9 +5328,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (udev->usb2_hw_lpm_enabled == 1) usb_set_usb2_hardware_lpm(udev, 0); - bos = udev->bos; - udev->bos = NULL; - /* Disable LPM and LTM while we reset the device and reinstall the alt * settings. Device-initiated LPM settings, and system exit latency * settings are cleared when the device is reset, so we have to set @@ -5334,15 +5336,17 @@ static int usb_reset_and_verify_device(struct usb_device *udev) ret = usb_unlocked_disable_lpm(udev); if (ret) { dev_err(&udev->dev, "%s Failed to disable LPM\n.", __func__); - goto re_enumerate; + goto re_enumerate_no_bos; } ret = usb_disable_ltm(udev); if (ret) { dev_err(&udev->dev, "%s Failed to disable LTM\n.", __func__); - goto re_enumerate; + goto re_enumerate_no_bos; } + bos = udev->bos; + for (i = 0; i < SET_CONFIG_TRIES; ++i) { /* ep0 maxpacket size may change; let the HCD know about it. @@ -5434,15 +5438,19 @@ static int usb_reset_and_verify_device(struct usb_device *udev) usb_set_usb2_hardware_lpm(udev, 1); usb_unlocked_enable_lpm(udev); usb_enable_ltm(udev); - usb_release_bos_descriptor(udev); - udev->bos = bos; + /* release the new BOS descriptor allocated by hub_port_init() */ + if (udev->bos != bos) { + usb_release_bos_descriptor(udev); + udev->bos = bos; + } return 0; re_enumerate: - /* LPM state doesn't matter when we're about to destroy the device. */ - hub_port_logical_disconnect(parent_hub, port1); usb_release_bos_descriptor(udev); udev->bos = bos; +re_enumerate_no_bos: + /* LPM state doesn't matter when we're about to destroy the device. */ + hub_port_logical_disconnect(parent_hub, port1); return -ENODEV; } diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d85abfed84cca..017c1de53aa56 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -54,6 +54,13 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech ConferenceCam CC3000e */ + { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0848), .driver_info = USB_QUIRK_DELAY_INIT }, + + /* Logitech PTZ Pro Camera */ + { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -78,6 +85,12 @@ static const struct usb_device_id usb_quirk_list[] = { /* Philips PSC805 audio device */ { USB_DEVICE(0x0471, 0x0155), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Plantronic Audio 655 DSP */ + { USB_DEVICE(0x047f, 0xc008), .driver_info = USB_QUIRK_RESET_RESUME }, + + /* Plantronic Audio 648 USB */ + { USB_DEVICE(0x047f, 0xc013), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Artisman Watchdog Dongle */ { USB_DEVICE(0x04b4, 0x0526), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, @@ -186,6 +199,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Blackmagic Design Intensity Shuttle */ + { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM }, + + /* Blackmagic Design UltraStudio SDI */ + { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM }, + { } /* terminating entry must be last */ }; diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 7eb1e26798e5f..457255a3306a3 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -65,6 +65,7 @@ extern int usb_hub_init(void); extern void usb_hub_cleanup(void); extern int usb_major_init(void); extern void usb_major_cleanup(void); +extern int usb_device_supports_lpm(struct usb_device *udev); #ifdef CONFIG_PM diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2bbab3d86fffe..8e9518fe77638 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -445,12 +445,18 @@ static int dwc3_core_init(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GSNPSID); /* This should read as U3 followed by revision number */ - if ((reg & DWC3_GSNPSID_MASK) != 0x55330000) { + if ((reg & DWC3_GSNPSID_MASK) == 0x55330000) { + /* Detected DWC_usb3 IP */ + dwc->revision = reg; + } else if ((reg & DWC3_GSNPSID_MASK) == 0x33310000) { + /* Detected DWC_usb31 IP */ + dwc->revision = dwc3_readl(dwc->regs, DWC3_VER_NUMBER); + dwc->revision |= DWC3_REVISION_IS_DWC31; + } else { dev_err(dwc->dev, "this is not a DesignWare USB3 DRD Core\n"); ret = -ENODEV; goto err0; } - dwc->revision = reg; /* * Write Linux Version Code to our GUID register so it's easy to figure diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c0eafa6fd4031..be245d073f15e 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -107,6 +107,9 @@ #define DWC3_GPRTBIMAP_FS0 0xc188 #define DWC3_GPRTBIMAP_FS1 0xc18c +#define DWC3_VER_NUMBER 0xc1a0 +#define DWC3_VER_TYPE 0xc1a4 + #define DWC3_GUSB2PHYCFG(n) (0xc200 + (n * 0x04)) #define DWC3_GUSB2I2CCTL(n) (0xc240 + (n * 0x04)) @@ -752,6 +755,14 @@ struct dwc3 { u32 num_event_buffers; u32 u1u2; u32 maximum_speed; + + /* + * All 3.1 IP version constants are greater than the 3.0 IP + * version constants. This works for most version checks in + * dwc3. However, in the future, this may not apply as + * features may be developed on newer versions of the 3.0 IP + * that are not in the 3.1 IP. + */ u32 revision; #define DWC3_REVISION_173A 0x5533173a @@ -774,6 +785,13 @@ struct dwc3 { #define DWC3_REVISION_270A 0x5533270a #define DWC3_REVISION_280A 0x5533280a +/* + * NOTICE: we're using bit 31 as a "is usb 3.1" flag. This is really + * just so dwc31 revisions are always larger than dwc3. + */ +#define DWC3_REVISION_IS_DWC31 0x80000000 +#define DWC3_USB31_REVISION_110A (0x3131302a | DWC3_REVISION_IS_USB31) + enum dwc3_ep0_next ep0_next_event; enum dwc3_ep0_state ep0state; enum dwc3_link_state link_state; @@ -811,7 +829,6 @@ struct dwc3 { unsigned pullups_connected:1; unsigned resize_fifos:1; unsigned setup_packet_pending:1; - unsigned start_config_issued:1; unsigned three_stage_setup:1; unsigned usb3_lpm_capable:1; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index b773fb53d6a7c..830f020230c46 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -25,6 +25,8 @@ #include "platform_data.h" #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd +#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce +#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31 0xabcf #define PCI_DEVICE_ID_INTEL_BYT 0x0f37 #define PCI_DEVICE_ID_INTEL_MRFLD 0x119e #define PCI_DEVICE_ID_INTEL_BSW 0x22B7 @@ -65,6 +67,21 @@ static int dwc3_pci_quirks(struct pci_dev *pdev) sizeof(pdata)); } + if (pdev->vendor == PCI_VENDOR_ID_SYNOPSYS && + (pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 || + pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI || + pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31)) { + + struct dwc3_platform_data pdata; + + memset(&pdata, 0, sizeof(pdata)); + pdata.usb3_lpm_capable = true; + pdata.has_lpm_erratum = true; + + return platform_device_add_data(pci_get_drvdata(pdev), &pdata, + sizeof(pdata)); + } + return 0; } @@ -136,6 +153,14 @@ static const struct pci_device_id dwc3_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3), }, + { + PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, + PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI), + }, + { + PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, + PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31), + }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 2ef3c8d6a9dbd..00f2c456f94bb 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -545,7 +545,6 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) int ret; u32 reg; - dwc->start_config_issued = false; cfg = le16_to_cpu(ctrl->wValue); switch (state) { @@ -816,6 +815,11 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, unsigned maxp = ep0->endpoint.maxpacket; transfer_size += (maxp - (transfer_size % maxp)); + + /* Maximum of DWC3_EP0_BOUNCE_SIZE can only be received */ + if (transfer_size > DWC3_EP0_BOUNCE_SIZE) + transfer_size = DWC3_EP0_BOUNCE_SIZE; + transferred = min_t(u32, ur->length, transfer_size - length); memcpy(ur->buf, dwc->ep0_bounce, transferred); @@ -937,11 +941,14 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, return; } - WARN_ON(req->request.length > DWC3_EP0_BOUNCE_SIZE); - maxpacket = dep->endpoint.maxpacket; transfer_size = roundup(req->request.length, maxpacket); + if (transfer_size > DWC3_EP0_BOUNCE_SIZE) { + dev_WARN(dwc->dev, "bounce buf can't handle req len\n"); + transfer_size = DWC3_EP0_BOUNCE_SIZE; + } + dwc->ep0_bounced = true; /* diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8946c32047e99..b886226be2418 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,6 +291,8 @@ int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned cmd, u32 param) dwc3_trace(trace_dwc3_gadget, "Command Complete --> %d", DWC3_DGCMD_STATUS(reg)); + if (DWC3_DGCMD_STATUS(reg)) + return -EINVAL; return 0; } @@ -328,6 +330,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep, dwc3_trace(trace_dwc3_gadget, "Command Complete --> %d", DWC3_DEPCMD_STATUS(reg)); + if (DWC3_DEPCMD_STATUS(reg)) + return -EINVAL; return 0; } @@ -384,24 +388,66 @@ static void dwc3_free_trb_pool(struct dwc3_ep *dep) dep->trb_pool_dma = 0; } +static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep); + +/** + * dwc3_gadget_start_config - Configure EP resources + * @dwc: pointer to our controller context structure + * @dep: endpoint that is being enabled + * + * The assignment of transfer resources cannot perfectly follow the + * data book due to the fact that the controller driver does not have + * all knowledge of the configuration in advance. It is given this + * information piecemeal by the composite gadget framework after every + * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook + * programming model in this scenario can cause errors. For two + * reasons: + * + * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION + * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of + * multiple interfaces. + * + * 2) The databook does not mention doing more DEPXFERCFG for new + * endpoint on alt setting (8.1.6). + * + * The following simplified method is used instead: + * + * All hardware endpoints can be assigned a transfer resource and this + * setting will stay persistent until either a core reset or + * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and + * do DEPXFERCFG for every hardware endpoint as well. We are + * guaranteed that there are as many transfer resources as endpoints. + * + * This function is called for each endpoint when it is being enabled + * but is triggered only when called for EP0-out, which always happens + * first, and which should only happen in one of the above conditions. + */ static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep) { struct dwc3_gadget_ep_cmd_params params; u32 cmd; + int i; + int ret; + + if (dep->number) + return 0; memset(¶ms, 0x00, sizeof(params)); + cmd = DWC3_DEPCMD_DEPSTARTCFG; - if (dep->number != 1) { - cmd = DWC3_DEPCMD_DEPSTARTCFG; - /* XferRscIdx == 0 for ep0 and 2 for the remaining */ - if (dep->number > 1) { - if (dwc->start_config_issued) - return 0; - dwc->start_config_issued = true; - cmd |= DWC3_DEPCMD_PARAM(2); - } + ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, ¶ms); + if (ret) + return ret; + + for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) { + struct dwc3_ep *dep = dwc->eps[i]; + + if (!dep) + continue; - return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, ¶ms); + ret = dwc3_gadget_set_xfer_resource(dwc, dep); + if (ret) + return ret; } return 0; @@ -515,10 +561,6 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, struct dwc3_trb *trb_st_hw; struct dwc3_trb *trb_link; - ret = dwc3_gadget_set_xfer_resource(dwc, dep); - if (ret) - return ret; - dep->endpoint.desc = desc; dep->comp_desc = comp_desc; dep->type = usb_endpoint_type(desc); @@ -1585,8 +1627,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc3_writel(dwc->regs, DWC3_DCFG, reg); - dwc->start_config_issued = false; - /* Start with SuperSpeed Default */ dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512); @@ -1855,27 +1895,32 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, unsigned int i; int ret; - req = next_request(&dep->req_queued); - if (!req) { - WARN_ON_ONCE(1); - return 1; - } - i = 0; do { - slot = req->start_slot + i; - if ((slot == DWC3_TRB_NUM - 1) && + req = next_request(&dep->req_queued); + if (!req) { + WARN_ON_ONCE(1); + return 1; + } + i = 0; + do { + slot = req->start_slot + i; + if ((slot == DWC3_TRB_NUM - 1) && usb_endpoint_xfer_isoc(dep->endpoint.desc)) - slot++; - slot %= DWC3_TRB_NUM; - trb = &dep->trb_pool[slot]; + slot++; + slot %= DWC3_TRB_NUM; + trb = &dep->trb_pool[slot]; + + ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, + event, status); + if (ret) + break; + } while (++i < req->request.num_mapped_sgs); + + dwc3_gadget_giveback(dep, req, status); - ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, - event, status); if (ret) break; - } while (++i < req->request.num_mapped_sgs); - - dwc3_gadget_giveback(dep, req, status); + } while (1); if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && list_empty(&dep->req_queued)) { @@ -1902,12 +1947,16 @@ static void dwc3_endpoint_transfer_complete(struct dwc3 *dwc, { unsigned status = 0; int clean_busy; + u32 is_xfer_complete; + + is_xfer_complete = (event->endpoint_event == DWC3_DEPEVT_XFERCOMPLETE); if (event->status & DEPEVT_STATUS_BUSERR) status = -ECONNRESET; clean_busy = dwc3_cleanup_done_reqs(dwc, dep, event, status); - if (clean_busy) + if (clean_busy && (is_xfer_complete || + usb_endpoint_xfer_isoc(dep->endpoint.desc))) dep->flags &= ~DWC3_EP_BUSY; /* @@ -2154,7 +2203,6 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_DCTL, reg); dwc3_disconnect_gadget(dwc); - dwc->start_config_issued = false; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->setup_packet_pending = false; @@ -2205,7 +2253,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) dwc3_stop_active_transfers(dwc); dwc3_clear_stall_all_ep(dwc); - dwc->start_config_issued = false; /* Reset device address to zero */ reg = dwc3_readl(dwc->regs, DWC3_DCFG); @@ -2701,11 +2748,33 @@ int dwc3_gadget_init(struct dwc3 *dwc) } dwc->gadget.ops = &dwc3_gadget_ops; - dwc->gadget.max_speed = USB_SPEED_SUPER; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->gadget.sg_supported = true; dwc->gadget.name = "dwc3-gadget"; + /* + * FIXME We might be setting max_speed to revision < DWC3_REVISION_220A) + dwc3_trace(trace_dwc3_gadget, + "Changing max_speed on rev %08x\n", + dwc->revision); + + dwc->gadget.max_speed = dwc->maximum_speed; + /* * Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize * on ep out. diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 4e3447bbd0976..58b4657fc721d 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1758,10 +1758,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) * take such requests too, if that's ever needed: to work * in config 0, etc. */ - list_for_each_entry(f, &cdev->config->functions, list) - if (f->req_match && f->req_match(f, ctrl)) - goto try_fun_setup; - f = NULL; + if (cdev->config) { + list_for_each_entry(f, &cdev->config->functions, list) + if (f->req_match && f->req_match(f, ctrl)) + goto try_fun_setup; + f = NULL; + } + switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_INTERFACE: if (!cdev->config || intf >= MAX_CONFIG_INTERFACES) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 3507f880eb742..6e7be91e6097c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -924,7 +924,8 @@ static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from) kiocb->private = p; - kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); + if (p->aio) + kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); res = ffs_epfile_io(kiocb->ki_filp, p); if (res == -EIOCBQUEUED) @@ -968,7 +969,8 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to) kiocb->private = p; - kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); + if (p->aio) + kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); res = ffs_epfile_io(kiocb->ki_filp, p); if (res == -EIOCBQUEUED) @@ -3435,6 +3437,7 @@ static int ffs_ready(struct ffs_data *ffs) static void ffs_closed(struct ffs_data *ffs) { struct ffs_dev *ffs_obj; + struct f_fs_opts *opts; ENTER(); ffs_dev_lock(); @@ -3449,8 +3452,13 @@ static void ffs_closed(struct ffs_data *ffs) ffs_obj->ffs_closed_callback) ffs_obj->ffs_closed_callback(ffs); - if (!ffs_obj->opts || ffs_obj->opts->no_configfs - || !ffs_obj->opts->func_inst.group.cg_item.ci_parent) + if (ffs_obj->opts) + opts = ffs_obj->opts; + else + goto done; + + if (opts->no_configfs || !opts->func_inst.group.cg_item.ci_parent + || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) goto done; unregister_gadget_item(ffs_obj->opts-> diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3cc109f3c9c80..15c3071550370 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2786,7 +2786,7 @@ int fsg_common_set_nluns(struct fsg_common *common, int nluns) return -EINVAL; } - curlun = kcalloc(nluns, sizeof(*curlun), GFP_KERNEL); + curlun = kcalloc(FSG_MAX_LUNS, sizeof(*curlun), GFP_KERNEL); if (unlikely(!curlun)) return -ENOMEM; @@ -2796,8 +2796,6 @@ int fsg_common_set_nluns(struct fsg_common *common, int nluns) common->luns = curlun; common->nluns = nluns; - pr_info("Number of LUNs=%d\n", common->nluns); - return 0; } EXPORT_SYMBOL_GPL(fsg_common_set_nluns); @@ -3563,14 +3561,26 @@ static struct usb_function *fsg_alloc(struct usb_function_instance *fi) struct fsg_opts *opts = fsg_opts_from_func_inst(fi); struct fsg_common *common = opts->common; struct fsg_dev *fsg; + unsigned nluns, i; fsg = kzalloc(sizeof(*fsg), GFP_KERNEL); if (unlikely(!fsg)) return ERR_PTR(-ENOMEM); mutex_lock(&opts->lock); + if (!opts->refcnt) { + for (nluns = i = 0; i < FSG_MAX_LUNS; ++i) + if (common->luns[i]) + nluns = i + 1; + if (!nluns) + pr_warn("No LUNS defined, continuing anyway\n"); + else + common->nluns = nluns; + pr_info("Number of LUNs=%u\n", common->nluns); + } opts->refcnt++; mutex_unlock(&opts->lock); + fsg->function.name = FSG_DRIVER_DESC; fsg->function.bind = fsg_bind; fsg->function.unbind = fsg_unbind; diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 6d3eb8b00a488..96d935b00504c 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -975,6 +975,29 @@ free_ep(struct uac2_rtd_params *prm, struct usb_ep *ep) "%s:%d Error!\n", __func__, __LINE__); } +static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, + struct usb_endpoint_descriptor *ep_desc, + unsigned int factor, bool is_playback) +{ + int chmask, srate, ssize; + u16 max_packet_size; + + if (is_playback) { + chmask = uac2_opts->p_chmask; + srate = uac2_opts->p_srate; + ssize = uac2_opts->p_ssize; + } else { + chmask = uac2_opts->c_chmask; + srate = uac2_opts->c_srate; + ssize = uac2_opts->c_ssize; + } + + max_packet_size = num_channels(chmask) * ssize * + DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); + ep_desc->wMaxPacketSize = cpu_to_le16(min(max_packet_size, + le16_to_cpu(ep_desc->wMaxPacketSize))); +} + static int afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) { @@ -1070,10 +1093,14 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) uac2->p_prm.uac2 = uac2; uac2->c_prm.uac2 = uac2; + /* Calculate wMaxPacketSize according to audio bandwidth */ + set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true); + set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false); + set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true); + set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false); + hs_epout_desc.bEndpointAddress = fs_epout_desc.bEndpointAddress; - hs_epout_desc.wMaxPacketSize = fs_epout_desc.wMaxPacketSize; hs_epin_desc.bEndpointAddress = fs_epin_desc.bEndpointAddress; - hs_epin_desc.wMaxPacketSize = fs_epin_desc.wMaxPacketSize; ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL); if (ret) @@ -1162,14 +1189,14 @@ afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt) factor = 1000; } else { ep_desc = &hs_epin_desc; - factor = 125; + factor = 8000; } /* pre-compute some values for iso_complete() */ uac2->p_framesize = opts->p_ssize * num_channels(opts->p_chmask); rate = opts->p_srate * uac2->p_framesize; - uac2->p_interval = (1 << (ep_desc->bInterval - 1)) * factor; + uac2->p_interval = factor / (1 << (ep_desc->bInterval - 1)); uac2->p_pktsize = min_t(unsigned int, rate / uac2->p_interval, prm->max_psize); diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 351d48550c332..d6ca3697d3c8d 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -1634,7 +1634,7 @@ static irqreturn_t usba_udc_irq(int irq, void *devid) spin_lock(&udc->lock); int_enb = usba_int_enb_get(udc); - status = usba_readl(udc, INT_STA) & int_enb; + status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED); DBG(DBG_INT, "irq, status=%#08x\n", status); if (status & USBA_DET_SUSPEND) { diff --git a/drivers/usb/gadget/udc/m66592-udc.c b/drivers/usb/gadget/udc/m66592-udc.c index 309706fe4bf0a..9704053dfe053 100644 --- a/drivers/usb/gadget/udc/m66592-udc.c +++ b/drivers/usb/gadget/udc/m66592-udc.c @@ -1052,7 +1052,7 @@ static void set_feature(struct m66592 *m66592, struct usb_ctrlrequest *ctrl) tmp = m66592_read(m66592, M66592_INTSTS0) & M66592_CTSQ; udelay(1); - } while (tmp != M66592_CS_IDST || timeout-- > 0); + } while (tmp != M66592_CS_IDST && timeout-- > 0); if (tmp == M66592_CS_IDST) m66592_bset(m66592, diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c index d32160d6463f5..5da37c957b53c 100644 --- a/drivers/usb/gadget/udc/mv_udc_core.c +++ b/drivers/usb/gadget/udc/mv_udc_core.c @@ -2167,7 +2167,7 @@ static int mv_udc_probe(struct platform_device *pdev) return -ENODEV; } - udc->phy_regs = ioremap(r->start, resource_size(r)); + udc->phy_regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (udc->phy_regs == NULL) { dev_err(&pdev->dev, "failed to map phy I/O memory\n"); return -EBUSY; diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index b51226abade62..7a454708e948a 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2535,6 +2535,9 @@ static int pxa_udc_suspend(struct platform_device *_dev, pm_message_t state) udc->pullup_resume = udc->pullup_on; dplus_pullup(udc, 0); + if (udc->driver) + udc->driver->disconnect(&udc->gadget); + return 0; } diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index d69c35558f685..7d69931cf45d0 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -321,6 +321,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, err3: put_device(&udc->dev); + device_del(&gadget->dev); err2: put_device(&gadget->dev); diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile index 65b0b6a585997..da03d8b258dda 100644 --- a/drivers/usb/host/Makefile +++ b/drivers/usb/host/Makefile @@ -26,9 +26,6 @@ obj-$(CONFIG_USB_WHCI_HCD) += whci/ obj-$(CONFIG_PCI) += pci-quirks.o -obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o -obj-$(CONFIG_USB_XHCI_PLATFORM) += xhci-plat-hcd.o - obj-$(CONFIG_USB_EHCI_HCD) += ehci-hcd.o obj-$(CONFIG_USB_EHCI_PCI) += ehci-pci.o obj-$(CONFIG_USB_EHCI_HCD_PLATFORM) += ehci-platform.o @@ -63,6 +60,8 @@ obj-$(CONFIG_USB_OHCI_HCD_PXA27X) += ohci-pxa27x.o obj-$(CONFIG_USB_UHCI_HCD) += uhci-hcd.o obj-$(CONFIG_USB_FHCI_HCD) += fhci.o obj-$(CONFIG_USB_XHCI_HCD) += xhci-hcd.o +obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o +obj-$(CONFIG_USB_XHCI_PLATFORM) += xhci-plat-hcd.o obj-$(CONFIG_USB_SL811_HCD) += sl811-hcd.o obj-$(CONFIG_USB_SL811_CS) += sl811_cs.o obj-$(CONFIG_USB_U132_HCD) += u132-hcd.o diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c index bfcbb9aa8816b..ee8d5faa01947 100644 --- a/drivers/usb/host/ehci-orion.c +++ b/drivers/usb/host/ehci-orion.c @@ -224,7 +224,8 @@ static int ehci_orion_drv_probe(struct platform_device *pdev) priv->phy = devm_phy_optional_get(&pdev->dev, "usb"); if (IS_ERR(priv->phy)) { err = PTR_ERR(priv->phy); - goto err_phy_get; + if (err != -ENOSYS) + goto err_phy_get; } else { err = phy_init(priv->phy); if (err) diff --git a/drivers/usb/host/ehci-sysfs.c b/drivers/usb/host/ehci-sysfs.c index 5e44407aa0997..5216f2b09d633 100644 --- a/drivers/usb/host/ehci-sysfs.c +++ b/drivers/usb/host/ehci-sysfs.c @@ -29,7 +29,7 @@ static ssize_t show_companion(struct device *dev, int count = PAGE_SIZE; char *ptr = buf; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); nports = HCS_N_PORTS(ehci->hcs_params); for (index = 0; index < nports; ++index) { @@ -54,7 +54,7 @@ static ssize_t store_companion(struct device *dev, struct ehci_hcd *ehci; int portnum, new_owner; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); new_owner = PORT_OWNER; /* Owned by companion */ if (sscanf(buf, "%d", &portnum) != 1) return -EINVAL; @@ -85,7 +85,7 @@ static ssize_t show_uframe_periodic_max(struct device *dev, struct ehci_hcd *ehci; int n; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); n = scnprintf(buf, PAGE_SIZE, "%d\n", ehci->uframe_periodic_max); return n; } @@ -101,7 +101,7 @@ static ssize_t store_uframe_periodic_max(struct device *dev, unsigned long flags; ssize_t ret; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); if (kstrtouint(buf, 0, &uframe_periodic_max) < 0) return -EINVAL; diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index 1463c398d3220..fe1d5fc7da2d9 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -980,10 +980,6 @@ static void finish_unlinks(struct ohci_hcd *ohci) int completed, modified; __hc32 *prev; - /* Is this ED already invisible to the hardware? */ - if (ed->state == ED_IDLE) - goto ed_idle; - /* only take off EDs that the HC isn't using, accounting for * frame counter wraps and EDs with partially retired TDs */ @@ -1011,12 +1007,10 @@ static void finish_unlinks(struct ohci_hcd *ohci) } /* ED's now officially unlinked, hc doesn't see */ - ed->state = ED_IDLE; ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H); ed->hwNextED = 0; wmb(); ed->hwINFO &= ~cpu_to_hc32(ohci, ED_SKIP | ED_DEQUEUE); -ed_idle: /* reentrancy: if we drop the schedule lock, someone might * have modified this list. normally it's just prepending @@ -1087,6 +1081,7 @@ static void finish_unlinks(struct ohci_hcd *ohci) if (list_empty(&ed->td_list)) { *last = ed->ed_next; ed->ed_next = NULL; + ed->state = ED_IDLE; list_del(&ed->in_use_list); } else if (ohci->rh_state == OHCI_RH_RUNNING) { *last = ed->ed_next; diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c index dc31c425ce017..9f1c0538b2112 100644 --- a/drivers/usb/host/whci/qset.c +++ b/drivers/usb/host/whci/qset.c @@ -377,6 +377,10 @@ static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_f if (std->pl_virt == NULL) return -ENOMEM; std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE); + if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) { + kfree(std->pl_virt); + return -EFAULT; + } for (p = 0; p < std->num_pointers; p++) { std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0827d7c965276..ee07ba41c8db3 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -484,10 +484,13 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci, u32 pls = status_reg & PORT_PLS_MASK; /* resume state is a xHCI internal state. - * Do not report it to usb core. + * Do not report it to usb core, instead, pretend to be U3, + * thus usb core knows it's not ready for transfer */ - if (pls == XDEV_RESUME) + if (pls == XDEV_RESUME) { + *status |= USB_SS_PORT_LS_U3; return; + } /* When the CAS bit is set then warm reset * should be performed on port @@ -588,7 +591,14 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_C_RESET << 16; /* USB3.0 only */ if (hcd->speed == HCD_USB3) { - if ((raw_port_status & PORT_PLC)) + /* Port link change with port in resume state should not be + * reported to usbcore, as this is an internal state to be + * handled by xhci driver. Reporting PLC to usbcore may + * cause usbcore clearing PLC first and port change event + * irq won't be generated. + */ + if ((raw_port_status & PORT_PLC) && + (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME) status |= USB_PORT_STAT_C_LINK_STATE << 16; if ((raw_port_status & PORT_WRC)) status |= USB_PORT_STAT_C_BH_RESET << 16; @@ -1120,10 +1130,10 @@ int xhci_bus_suspend(struct usb_hcd *hcd) spin_lock_irqsave(&xhci->lock, flags); if (hcd->self.root_hub->do_remote_wakeup) { - if (bus_state->resuming_ports) { + if (bus_state->resuming_ports || /* USB2 */ + bus_state->port_remote_wakeup) { /* USB3 */ spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "suspend failed because " - "a port is resuming\n"); + xhci_dbg(xhci, "suspend failed because a port is resuming\n"); return -EBUSY; } } diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f8336408ef07c..41f841fa6c4de 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1427,10 +1427,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Attempt to use the ring cache */ if (virt_dev->num_rings_cached == 0) return -ENOMEM; + virt_dev->num_rings_cached--; virt_dev->eps[ep_index].new_ring = virt_dev->ring_cache[virt_dev->num_rings_cached]; virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL; - virt_dev->num_rings_cached--; xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring, 1, type); } @@ -1498,10 +1498,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, * use Event Data TRBs, and we don't chain in a link TRB on short * transfers, we're basically dividing by 1. * - * xHCI 1.0 specification indicates that the Average TRB Length should - * be set to 8 for control endpoints. + * xHCI 1.0 and 1.1 specification indicates that the Average TRB Length + * should be set to 8 for control endpoints. */ - if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version == 0x100) + if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) ep_ctx->tx_info |= cpu_to_le32(AVG_TRB_LENGTH_FOR_EP(8)); else ep_ctx->tx_info |= @@ -2320,6 +2320,10 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) INIT_LIST_HEAD(&xhci->cmd_list); + /* init command timeout timer */ + setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, + (unsigned long)xhci); + page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Supported page size register = 0x%x", page_size); @@ -2504,10 +2508,6 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) "Wrote ERST address to ir_set 0."); xhci_print_ir_set(xhci, 0); - /* init command timeout timer */ - setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, - (unsigned long)xhci); - /* * XXX: Might need to set the Interrupter Moderation Register to * something other than the default (~1ms minimum between interrupts). diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 2af32e26fafc3..3ff5fcc7c94bd 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -23,10 +23,17 @@ #include #include #include +#include #include "xhci.h" #include "xhci-trace.h" +#define SSIC_PORT_NUM 2 +#define SSIC_PORT_CFG2 0x880c +#define SSIC_PORT_CFG2_OFFSET 0x30 +#define PROG_DONE (1 << 30) +#define SSIC_PORT_UNUSED (1 << 31) + /* Device for a quirk */ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK 0x1000 @@ -40,6 +47,7 @@ #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI 0x22b5 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f +#define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 static const char hcd_name[] = "xhci_hcd"; @@ -135,13 +143,19 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) { xhci->quirks |= XHCI_SPURIOUS_REBOOT; + xhci->quirks |= XHCI_SPURIOUS_WAKEUP; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { + xhci->quirks |= XHCI_SSIC_PORT_UNUSED; + } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ168) { xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -168,20 +182,18 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) "QUIRK: Resetting on resume"); } -/* - * Make sure PME works on some Intel xHCI controllers by writing 1 to clear - * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 - */ -static void xhci_pme_quirk(struct xhci_hcd *xhci) +#ifdef CONFIG_ACPI +static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { - u32 val; - void __iomem *reg; - - reg = (void __iomem *) xhci->cap_regs + 0x80a4; - val = readl(reg); - writel(val | BIT(28), reg); - readl(reg); + static const u8 intel_dsm_uuid[] = { + 0xb7, 0x0c, 0x34, 0xac, 0x01, 0xe9, 0xbf, 0x45, + 0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23, + }; + acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1, NULL); } +#else + static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { } +#endif /* CONFIG_ACPI */ /* called during probe() after chip reset completes */ static int xhci_pci_setup(struct usb_hcd *hcd) @@ -262,6 +274,9 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(dev); + /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); @@ -295,10 +310,65 @@ static void xhci_pci_remove(struct pci_dev *dev) } #ifdef CONFIG_PM +/* + * In some Intel xHCI controllers, in order to get D3 working, + * through a vendor specific SSIC CONFIG register at offset 0x883c, + * SSIC PORT need to be marked as "unused" before putting xHCI + * into D3. After D3 exit, the SSIC port need to be marked as "used". + * Without this change, xHCI might not enter D3 state. + */ +static void xhci_ssic_port_unused_quirk(struct usb_hcd *hcd, bool suspend) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + u32 val; + void __iomem *reg; + int i; + + for (i = 0; i < SSIC_PORT_NUM; i++) { + reg = (void __iomem *) xhci->cap_regs + + SSIC_PORT_CFG2 + + i * SSIC_PORT_CFG2_OFFSET; + + /* Notify SSIC that SSIC profile programming is not done. */ + val = readl(reg) & ~PROG_DONE; + writel(val, reg); + + /* Mark SSIC port as unused(suspend) or used(resume) */ + val = readl(reg); + if (suspend) + val |= SSIC_PORT_UNUSED; + else + val &= ~SSIC_PORT_UNUSED; + writel(val, reg); + + /* Notify SSIC that SSIC profile programming is done */ + val = readl(reg) | PROG_DONE; + writel(val, reg); + readl(reg); + } +} + +/* + * Make sure PME works on some Intel xHCI controllers by writing 1 to clear + * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 + */ +static void xhci_pme_quirk(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + void __iomem *reg; + u32 val; + + reg = (void __iomem *) xhci->cap_regs + 0x80a4; + val = readl(reg); + writel(val | BIT(28), reg); + readl(reg); +} + static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + int ret; /* * Systems with the TI redriver that loses port status change events @@ -308,9 +378,16 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) pdev->no_d3cold = true; if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_quirk(xhci); + xhci_pme_quirk(hcd); + + if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) + xhci_ssic_port_unused_quirk(hcd, true); - return xhci_suspend(xhci, do_wakeup); + ret = xhci_suspend(xhci, do_wakeup); + if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED)) + xhci_ssic_port_unused_quirk(hcd, false); + + return ret; } static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) @@ -340,8 +417,11 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) if (pdev->vendor == PCI_VENDOR_ID_INTEL) usb_enable_intel_xhci_ports(pdev); + if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) + xhci_ssic_port_unused_quirk(hcd, false); + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_quirk(xhci); + xhci_pme_quirk(hcd); retval = xhci_resume(xhci, hibernated); return retval; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7d34cbfaf373b..e6d858a49d04d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -82,7 +82,7 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, return 0; /* offset in TRBs */ segment_offset = trb - seg->trbs; - if (segment_offset > TRBS_PER_SEGMENT) + if (segment_offset >= TRBS_PER_SEGMENT) return 0; return seg->dma + (segment_offset * sizeof(*trb)); } @@ -302,6 +302,15 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) ret = xhci_handshake(&xhci->op_regs->cmd_ring, CMD_RING_RUNNING, 0, 5 * 1000 * 1000); if (ret < 0) { + /* we are about to kill xhci, give it one more chance */ + xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, + &xhci->op_regs->cmd_ring); + udelay(1000); + ret = xhci_handshake(&xhci->op_regs->cmd_ring, + CMD_RING_RUNNING, 0, 3 * 1000 * 1000); + if (ret == 0) + return 0; + xhci_err(xhci, "Stopped the command ring failed, " "maybe the host is dead\n"); xhci->xhc_state |= XHCI_STATE_DYING; @@ -1546,6 +1555,9 @@ static void handle_port_status(struct xhci_hcd *xhci, usb_hcd_resume_root_hub(hcd); } + if (hcd->speed == HCD_USB3 && (temp & PORT_PLS_MASK) == XDEV_INACTIVE) + bus_state->port_remote_wakeup &= ~(1 << faked_port_index); + if ((temp & PORT_PLC) && (temp & PORT_PLS_MASK) == XDEV_RESUME) { xhci_dbg(xhci, "port resume event for port %d\n", port_id); @@ -2227,6 +2239,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, u32 trb_comp_code; int ret = 0; int td_num = 0; + bool handling_skipped_tds = false; slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags)); xdev = xhci->devs[slot_id]; @@ -2360,6 +2373,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep->skip = true; xhci_dbg(xhci, "Miss service interval error, set skip flag\n"); goto cleanup; + case COMP_PING_ERR: + ep->skip = true; + xhci_dbg(xhci, "No Ping response error, Skip one Isoc TD\n"); + goto cleanup; default: if (xhci_is_vendor_info_code(xhci, trb_comp_code)) { status = 0; @@ -2496,13 +2513,18 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep, &status); cleanup: + + + handling_skipped_tds = ep->skip && + trb_comp_code != COMP_MISSED_INT && + trb_comp_code != COMP_PING_ERR; + /* - * Do not update event ring dequeue pointer if ep->skip is set. - * Will roll back to continue process missed tds. + * Do not update event ring dequeue pointer if we're in a loop + * processing missed tds. */ - if (trb_comp_code == COMP_MISSED_INT || !ep->skip) { + if (!handling_skipped_tds) inc_deq(xhci, xhci->event_ring); - } if (ret) { urb = td->urb; @@ -2537,7 +2559,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, * Process them as short transfer until reach the td pointed by * the event. */ - } while (ep->skip && trb_comp_code != COMP_MISSED_INT); + } while (handling_skipped_tds); return 0; } @@ -2978,21 +3000,6 @@ int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags, return xhci_queue_bulk_tx(xhci, mem_flags, urb, slot_id, ep_index); } -/* - * The TD size is the number of bytes remaining in the TD (including this TRB), - * right shifted by 10. - * It must fit in bits 21:17, so it can't be bigger than 31. - */ -static u32 xhci_td_remainder(unsigned int remainder) -{ - u32 max = (1 << (21 - 17 + 1)) - 1; - - if ((remainder >> 10) >= max) - return max << 17; - else - return (remainder >> 10) << 17; -} - /* * For xHCI 1.0 host controllers, TD size is the number of max packet sized * packets remaining in the TD (*not* including this TRB). @@ -3005,30 +3012,36 @@ static u32 xhci_td_remainder(unsigned int remainder) * * TD size = total_packet_count - packets_transferred * - * It must fit in bits 21:17, so it can't be bigger than 31. + * For xHCI 0.96 and older, TD size field should be the remaining bytes + * including this TRB, right shifted by 10 + * + * For all hosts it must fit in bits 21:17, so it can't be bigger than 31. + * This is taken care of in the TRB_TD_SIZE() macro + * * The last TRB in a TD must have the TD size set to zero. */ -static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len, - unsigned int total_packet_count, struct urb *urb, - unsigned int num_trbs_left) +static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, + int trb_buff_len, unsigned int td_total_len, + struct urb *urb, unsigned int num_trbs_left) { - int packets_transferred; + u32 maxp, total_packet_count; + + if (xhci->hci_version < 0x100) + return ((td_total_len - transferred) >> 10); + + maxp = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); + total_packet_count = DIV_ROUND_UP(td_total_len, maxp); /* One TRB with a zero-length data packet. */ - if (num_trbs_left == 0 || (running_total == 0 && trb_buff_len == 0)) + if (num_trbs_left == 0 || (transferred == 0 && trb_buff_len == 0) || + trb_buff_len == td_total_len) return 0; - /* All the TRB queueing functions don't count the current TRB in - * running_total. - */ - packets_transferred = (running_total + trb_buff_len) / - GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); - - if ((total_packet_count - packets_transferred) > 31) - return 31 << 17; - return (total_packet_count - packets_transferred) << 17; + /* Queueing functions don't count the current TRB into transferred */ + return (total_packet_count - ((transferred + trb_buff_len) / maxp)); } + static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb, int slot_id, unsigned int ep_index) { @@ -3038,9 +3051,11 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct xhci_td *td; struct scatterlist *sg; int num_sgs; - int trb_buff_len, this_sg_len, running_total; + int trb_buff_len, this_sg_len, running_total, ret; unsigned int total_packet_count; + bool zero_length_needed; bool first_trb; + int last_trb_num; u64 addr; bool more_trbs_coming; @@ -3056,13 +3071,27 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, total_packet_count = DIV_ROUND_UP(urb->transfer_buffer_length, usb_endpoint_maxp(&urb->ep->desc)); - trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id], + ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, num_trbs, urb, 0, mem_flags); - if (trb_buff_len < 0) - return trb_buff_len; + if (ret < 0) + return ret; urb_priv = urb->hcpriv; + + /* Deal with URB_ZERO_PACKET - need one more td/trb */ + zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET && + urb_priv->length == 2; + if (zero_length_needed) { + num_trbs++; + xhci_dbg(xhci, "Creating zero length td.\n"); + ret = prepare_transfer(xhci, xhci->devs[slot_id], + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + if (ret < 0) + return ret; + } + td = urb_priv->td[0]; /* @@ -3092,6 +3121,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length; first_trb = true; + last_trb_num = zero_length_needed ? 2 : 1; /* Queue the first TRB, even if it's zero-length */ do { u32 field = 0; @@ -3109,12 +3139,15 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Chain all the TRBs together; clear the chain bit in the last * TRB to indicate it's the last TRB in the chain. */ - if (num_trbs > 1) { + if (num_trbs > last_trb_num) { field |= TRB_CHAIN; - } else { - /* FIXME - add check for ZERO_PACKET flag before this */ + } else if (num_trbs == last_trb_num) { td->last_trb = ep_ring->enqueue; field |= TRB_IOC; + } else if (zero_length_needed && num_trbs == 1) { + trb_buff_len = 0; + urb_priv->td[1]->last_trb = ep_ring->enqueue; + field |= TRB_IOC; } /* Only set interrupt on short packet for IN endpoints */ @@ -3130,17 +3163,12 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, } /* Set the TRB length, TD size, and interrupter fields. */ - if (xhci->hci_version < 0x100) { - remainder = xhci_td_remainder( - urb->transfer_buffer_length - - running_total); - } else { - remainder = xhci_v1_0_td_remainder(running_total, - trb_buff_len, total_packet_count, urb, - num_trbs - 1); - } + remainder = xhci_td_remainder(xhci, running_total, trb_buff_len, + urb->transfer_buffer_length, + urb, num_trbs - 1); + length_field = TRB_LEN(trb_buff_len) | - remainder | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); if (num_trbs > 1) @@ -3176,7 +3204,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (running_total + trb_buff_len > urb->transfer_buffer_length) trb_buff_len = urb->transfer_buffer_length - running_total; - } while (running_total < urb->transfer_buffer_length); + } while (num_trbs > 0); check_trb_math(urb, num_trbs, running_total); giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, @@ -3194,7 +3222,9 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, int num_trbs; struct xhci_generic_trb *start_trb; bool first_trb; + int last_trb_num; bool more_trbs_coming; + bool zero_length_needed; int start_cycle; u32 field, length_field; @@ -3225,7 +3255,6 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, num_trbs++; running_total += TRB_MAX_BUFF_SIZE; } - /* FIXME: this doesn't deal with URB_ZERO_PACKET - need one more */ ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, @@ -3234,6 +3263,20 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, return ret; urb_priv = urb->hcpriv; + + /* Deal with URB_ZERO_PACKET - need one more td/trb */ + zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET && + urb_priv->length == 2; + if (zero_length_needed) { + num_trbs++; + xhci_dbg(xhci, "Creating zero length td.\n"); + ret = prepare_transfer(xhci, xhci->devs[slot_id], + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + if (ret < 0) + return ret; + } + td = urb_priv->td[0]; /* @@ -3255,7 +3298,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length; first_trb = true; - + last_trb_num = zero_length_needed ? 2 : 1; /* Queue the first TRB, even if it's zero-length */ do { u32 remainder = 0; @@ -3272,12 +3315,15 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Chain all the TRBs together; clear the chain bit in the last * TRB to indicate it's the last TRB in the chain. */ - if (num_trbs > 1) { + if (num_trbs > last_trb_num) { field |= TRB_CHAIN; - } else { - /* FIXME - add check for ZERO_PACKET flag before this */ + } else if (num_trbs == last_trb_num) { td->last_trb = ep_ring->enqueue; field |= TRB_IOC; + } else if (zero_length_needed && num_trbs == 1) { + trb_buff_len = 0; + urb_priv->td[1]->last_trb = ep_ring->enqueue; + field |= TRB_IOC; } /* Only set interrupt on short packet for IN endpoints */ @@ -3285,17 +3331,12 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, field |= TRB_ISP; /* Set the TRB length, TD size, and interrupter fields. */ - if (xhci->hci_version < 0x100) { - remainder = xhci_td_remainder( - urb->transfer_buffer_length - - running_total); - } else { - remainder = xhci_v1_0_td_remainder(running_total, - trb_buff_len, total_packet_count, urb, - num_trbs - 1); - } + remainder = xhci_td_remainder(xhci, running_total, trb_buff_len, + urb->transfer_buffer_length, + urb, num_trbs - 1); + length_field = TRB_LEN(trb_buff_len) | - remainder | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); if (num_trbs > 1) @@ -3315,7 +3356,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length - running_total; if (trb_buff_len > TRB_MAX_BUFF_SIZE) trb_buff_len = TRB_MAX_BUFF_SIZE; - } while (running_total < urb->transfer_buffer_length); + } while (num_trbs > 0); check_trb_math(urb, num_trbs, running_total); giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, @@ -3333,7 +3374,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct usb_ctrlrequest *setup; struct xhci_generic_trb *start_trb; int start_cycle; - u32 field, length_field; + u32 field, length_field, remainder; struct urb_priv *urb_priv; struct xhci_td *td; @@ -3382,8 +3423,8 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (start_cycle == 0) field |= 0x1; - /* xHCI 1.0 6.4.1.2.1: Transfer Type field */ - if (xhci->hci_version == 0x100) { + /* xHCI 1.0/1.1 6.4.1.2.1: Transfer Type field */ + if (xhci->hci_version >= 0x100) { if (urb->transfer_buffer_length > 0) { if (setup->bRequestType & USB_DIR_IN) field |= TRB_TX_TYPE(TRB_DATA_IN); @@ -3406,9 +3447,15 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, else field = TRB_TYPE(TRB_DATA); + remainder = xhci_td_remainder(xhci, 0, + urb->transfer_buffer_length, + urb->transfer_buffer_length, + urb, 1); + length_field = TRB_LEN(urb->transfer_buffer_length) | - xhci_td_remainder(urb->transfer_buffer_length) | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); + if (urb->transfer_buffer_length > 0) { if (setup->bRequestType & USB_DIR_IN) field |= TRB_DIR_IN; @@ -3631,17 +3678,12 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = td_remain_len; /* Set the TRB length, TD size, & interrupter fields. */ - if (xhci->hci_version < 0x100) { - remainder = xhci_td_remainder( - td_len - running_total); - } else { - remainder = xhci_v1_0_td_remainder( - running_total, trb_buff_len, - total_packet_count, urb, - (trbs_per_td - j - 1)); - } + remainder = xhci_td_remainder(xhci, running_total, + trb_buff_len, td_len, + urb, trbs_per_td - j - 1); + length_field = TRB_LEN(trb_buff_len) | - remainder | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); queue_trb(xhci, ep_ring, more_trbs_coming, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 36bf089b708fe..910f7fac031f8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -146,7 +146,8 @@ static int xhci_start(struct xhci_hcd *xhci) "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); if (!ret) - xhci->xhc_state &= ~XHCI_STATE_HALTED; + xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING); + return ret; } @@ -174,6 +175,16 @@ int xhci_reset(struct xhci_hcd *xhci) command |= CMD_RESET; writel(command, &xhci->op_regs->command); + /* Existing Intel xHCI controllers require a delay of 1 mS, + * after setting the CMD_RESET bit, and before accessing any + * HC registers. This allows the HC to complete the + * reset operation and be ready for HC register access. + * Without this delay, the subsequent HC register access, + * may result in a system hang very rarely. + */ + if (xhci->quirks & XHCI_INTEL_HOST) + udelay(1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, 10 * 1000 * 1000); if (ret) @@ -683,8 +694,11 @@ void xhci_stop(struct usb_hcd *hcd) u32 temp; struct xhci_hcd *xhci = hcd_to_xhci(hcd); + mutex_lock(&xhci->mutex); + if (!usb_hcd_is_primary_hcd(hcd)) { xhci_only_stop_hcd(xhci->shared_hcd); + mutex_unlock(&xhci->mutex); return; } @@ -723,6 +737,7 @@ void xhci_stop(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "xhci_stop completed - status = %x", readl(&xhci->op_regs->status)); + mutex_unlock(&xhci->mutex); } /* @@ -1340,6 +1355,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) if (usb_endpoint_xfer_isoc(&urb->ep->desc)) size = urb->number_of_packets; + else if (usb_endpoint_is_bulk_out(&urb->ep->desc) && + urb->transfer_buffer_length > 0 && + urb->transfer_flags & URB_ZERO_PACKET && + !(urb->transfer_buffer_length % usb_endpoint_maxp(&urb->ep->desc))) + size = 2; else size = 1; @@ -1539,7 +1559,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "HW died, freeing TD."); urb_priv = urb->hcpriv; - for (i = urb_priv->td_cnt; i < urb_priv->length; i++) { + for (i = urb_priv->td_cnt; + i < urb_priv->length && xhci->devs[urb->dev->slot_id]; + i++) { td = urb_priv->td[i]; if (!list_empty(&td->td_list)) list_del_init(&td->td_list); @@ -3453,6 +3475,9 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) return -EINVAL; } + if (virt_dev->tt_info) + old_active_eps = virt_dev->tt_info->active_eps; + if (virt_dev->udev != udev) { /* If the virt_dev and the udev does not match, this virt_dev * may belong to another udev. @@ -3787,6 +3812,9 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); + if (xhci->xhc_state) /* dying or halted */ + goto out; + if (!udev->slot_id) { xhci_dbg_trace(xhci, trace_xhci_dbg_address, "Bad Slot ID %d", udev->slot_id); @@ -4768,8 +4796,16 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ctrl_ctx->add_flags |= cpu_to_le32(SLOT_FLAG); slot_ctx = xhci_get_slot_ctx(xhci, config_cmd->in_ctx); slot_ctx->dev_info |= cpu_to_le32(DEV_HUB); + /* + * refer to section 6.2.2: MTT should be 0 for full speed hub, + * but it may be already set to 1 when setup an xHCI virtual + * device, so clear it anyway. + */ if (tt->multi) slot_ctx->dev_info |= cpu_to_le32(DEV_MTT); + else if (hdev->speed == USB_SPEED_FULL) + slot_ctx->dev_info &= cpu_to_le32(~DEV_MTT); + if (xhci->hci_version > 0x95) { xhci_dbg(xhci, "xHCI version %x needs hub " "TT think time and number of ports\n", @@ -5020,6 +5056,10 @@ static int __init xhci_hcd_init(void) BUILD_BUG_ON(sizeof(struct xhci_intr_reg) != 8*32/8); /* xhci_run_regs has eight fields and embeds 128 xhci_intr_regs */ BUILD_BUG_ON(sizeof(struct xhci_run_regs) != (8+8*128)*32/8); + + if (usb_disabled()) + return -ENODEV; + return 0; } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 6977f8491fa7c..f18cdf0ec795b 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -285,6 +285,7 @@ struct xhci_op_regs { #define XDEV_U0 (0x0 << 5) #define XDEV_U2 (0x2 << 5) #define XDEV_U3 (0x3 << 5) +#define XDEV_INACTIVE (0x6 << 5) #define XDEV_RESUME (0xf << 5) /* true: port has power (see HCC_PPC) */ #define PORT_POWER (1 << 9) @@ -1129,6 +1130,8 @@ enum xhci_setup_dev { /* Normal TRB fields */ /* transfer_len bitmasks - bits 0:16 */ #define TRB_LEN(p) ((p) & 0x1ffff) +/* TD Size, packets remaining in this TD, bits 21:17 (5 bits, so max 31) */ +#define TRB_TD_SIZE(p) (min((p), (u32)31) << 17) /* Interrupter Target - which MSI-X vector to target the completion event at */ #define TRB_INTR_TARGET(p) (((p) & 0x3ff) << 22) #define GET_INTR_TARGET(p) (((p) >> 22) & 0x3ff) @@ -1567,6 +1570,7 @@ struct xhci_hcd { /* For controllers with a broken beyond repair streams implementation */ #define XHCI_BROKEN_STREAMS (1 << 19) #define XHCI_PME_STUCK_QUIRK (1 << 20) +#define XHCI_SSIC_PORT_UNUSED (1 << 22) unsigned int num_active_eps; unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 3ad5d19e4d04e..23c794813e6a9 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -472,7 +472,7 @@ static int chaoskey_rng_read(struct hwrng *rng, void *data, if (this_time > max) this_time = max; - memcpy(data, dev->buf, this_time); + memcpy(data, dev->buf + dev->used, this_time); dev->used += this_time; diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 39db8b603627c..d1b9e0c7fb0d4 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -147,7 +147,7 @@ config USB_TI_CPPI_DMA config USB_TI_CPPI41_DMA bool 'TI CPPI 4.1 (AM335x)' - depends on ARCH_OMAP + depends on ARCH_OMAP && DMADEVICES select TI_CPPI41 config USB_TUSB_OMAP_DMA diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 6dca3d794ced6..9f65d84773726 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -132,7 +132,7 @@ static inline struct musb *dev_to_musb(struct device *dev) /*-------------------------------------------------------------------------*/ #ifndef CONFIG_BLACKFIN -static int musb_ulpi_read(struct usb_phy *phy, u32 offset) +static int musb_ulpi_read(struct usb_phy *phy, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -151,7 +151,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) * ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM. */ - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR); @@ -176,7 +176,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) return ret; } -static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) +static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -191,8 +191,8 @@ static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) power &= ~MUSB_POWER_SUSPENDM; musb_writeb(addr, MUSB_POWER, power); - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); - musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); + musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ); while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index 8bd8c5e26921c..d5a1407456400 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -614,7 +614,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) { struct musb *musb = controller->musb; struct device *dev = musb->controller; - struct device_node *np = dev->of_node; + struct device_node *np = dev->parent->of_node; struct cppi41_dma_channel *cppi41_channel; int count; int i; @@ -664,7 +664,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) musb_dma->status = MUSB_DMA_STATUS_FREE; musb_dma->max_len = SZ_4M; - dc = dma_request_slave_channel(dev, str); + dc = dma_request_slave_channel(dev->parent, str); if (!dc) { dev_err(dev, "Failed to request %s.\n", str); ret = -EPROBE_DEFER; @@ -694,7 +694,7 @@ struct dma_controller *dma_controller_create(struct musb *musb, struct cppi41_dma_controller *controller; int ret = 0; - if (!musb->controller->of_node) { + if (!musb->controller->parent->of_node) { dev_err(musb->controller, "Need DT for the DMA engine.\n"); return NULL; } diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 65d931a28a14c..dcac5e7f19e0b 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -225,8 +225,11 @@ static void dsps_musb_enable(struct musb *musb) dsps_writel(reg_base, wrp->epintr_set, epmask); dsps_writel(reg_base, wrp->coreintr_set, coremask); - /* start polling for ID change. */ - mod_timer(&glue->timer, jiffies + msecs_to_jiffies(wrp->poll_timeout)); + /* start polling for ID change in dual-role idle mode */ + if (musb->xceiv->otg->state == OTG_STATE_B_IDLE && + musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE) + mod_timer(&glue->timer, jiffies + + msecs_to_jiffies(wrp->poll_timeout)); dsps_musb_try_idle(musb, 0); } diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 86c4b533e90b3..4731baca377f9 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -273,9 +273,7 @@ static int musb_has_gadget(struct musb *musb) #ifdef CONFIG_USB_MUSB_HOST return 1; #else - if (musb->port_mode == MUSB_PORT_MODE_HOST) - return 1; - return musb->g.dev.driver != NULL; + return musb->port_mode == MUSB_PORT_MODE_HOST; #endif } diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index deee68eafb72a..0cd85f2ccddd3 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -230,7 +230,8 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop, clk_rate = pdata->clk_rate; needs_vcc = pdata->needs_vcc; if (gpio_is_valid(pdata->gpio_reset)) { - err = devm_gpio_request_one(dev, pdata->gpio_reset, 0, + err = devm_gpio_request_one(dev, pdata->gpio_reset, + GPIOF_ACTIVE_LOW, dev_name(dev)); if (!err) nop->gpiod_reset = diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c index 8f7cb068d29ba..3fcc0483a0811 100644 --- a/drivers/usb/phy/phy-mxs-usb.c +++ b/drivers/usb/phy/phy-mxs-usb.c @@ -217,6 +217,9 @@ static bool mxs_phy_get_vbus_status(struct mxs_phy *mxs_phy) { unsigned int vbus_value; + if (!mxs_phy->regmap_anatop) + return false; + if (mxs_phy->port_id == 0) regmap_read(mxs_phy->regmap_anatop, ANADIG_USB1_VBUS_DET_STAT, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index ffd739e31bfc1..7a76fe4c2f9ea 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -98,6 +98,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */ { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */ { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */ + { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */ { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */ { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */ { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */ @@ -132,7 +133,6 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ - { USB_DEVICE(0x10C4, 0xEA80) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */ { USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */ { USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */ @@ -161,6 +161,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ + { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ + { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ @@ -187,6 +191,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1FB9, 0x0602) }, /* Lake Shore Model 648 Magnet Power Supply */ { USB_DEVICE(0x1FB9, 0x0700) }, /* Lake Shore Model 737 VSM Controller */ { USB_DEVICE(0x1FB9, 0x0701) }, /* Lake Shore Model 776 Hall Matrix */ + { USB_DEVICE(0x2626, 0xEA60) }, /* Aruba Networks 7xxx USB Serial Console */ { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4c8b3b82103d6..8c660ae401d82 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -605,6 +605,10 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2WI_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX3_PID) }, /* * ELV devices: */ @@ -820,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) }, + { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) }, { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) }, /* Papouch devices based on FTDI chip */ diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 792e054126de5..7850071c0ae18 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -568,6 +568,14 @@ */ #define FTDI_SYNAPSE_SS200_PID 0x9090 /* SS200 - SNAP Stick 200 */ +/* + * CustomWare / ShipModul NMEA multiplexers product ids (FTDI_VID) + */ +#define FTDI_CUSTOMWARE_MINIPLEX_PID 0xfd48 /* MiniPlex first generation NMEA Multiplexer */ +#define FTDI_CUSTOMWARE_MINIPLEX2_PID 0xfd49 /* MiniPlex-USB and MiniPlex-2 series */ +#define FTDI_CUSTOMWARE_MINIPLEX2WI_PID 0xfd4a /* MiniPlex-2Wi */ +#define FTDI_CUSTOMWARE_MINIPLEX3_PID 0xfd4b /* MiniPlex-3 series */ + /********************************/ /** third-party VID/PID combos **/ @@ -607,6 +615,7 @@ */ #define RATOC_VENDOR_ID 0x0584 #define RATOC_PRODUCT_ID_USB60F 0xb020 +#define RATOC_PRODUCT_ID_SCU18 0xb03a /* * Infineon Technologies diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index f51a5d52c0eda..ec1b8f2c11837 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -531,7 +531,8 @@ static int ipaq_open(struct tty_struct *tty, * through. Since this has a reasonably high failure rate, we retry * several times. */ - while (retries--) { + while (retries) { + retries--; result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), 0x22, 0x21, 0x1, 0, NULL, 0, 100); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f0c0c53359ad6..ce3d6af977b74 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -162,6 +162,7 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 #define NOVATELWIRELESS_PRODUCT_E362 0x9010 #define NOVATELWIRELESS_PRODUCT_E371 0x9011 +#define NOVATELWIRELESS_PRODUCT_U620L 0x9022 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 @@ -270,6 +271,9 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_CC864_SINGLE 0x1006 #define TELIT_PRODUCT_DE910_DUAL 0x1010 #define TELIT_PRODUCT_UE910_V2 0x1012 +#define TELIT_PRODUCT_LE922_USBCFG0 0x1042 +#define TELIT_PRODUCT_LE922_USBCFG3 0x1043 +#define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 @@ -278,6 +282,10 @@ static void option_instat_callback(struct urb *urb); #define ZTE_PRODUCT_MF622 0x0001 #define ZTE_PRODUCT_MF628 0x0015 #define ZTE_PRODUCT_MF626 0x0031 +#define ZTE_PRODUCT_ZM8620_X 0x0396 +#define ZTE_PRODUCT_ME3620_MBIM 0x0426 +#define ZTE_PRODUCT_ME3620_X 0x1432 +#define ZTE_PRODUCT_ME3620_L 0x1433 #define ZTE_PRODUCT_AC2726 0xfff1 #define ZTE_PRODUCT_MG880 0xfffd #define ZTE_PRODUCT_CDMA_TECH 0xfffe @@ -311,6 +319,7 @@ static void option_instat_callback(struct urb *urb); #define TOSHIBA_PRODUCT_G450 0x0d45 #define ALINK_VENDOR_ID 0x1e0e +#define SIMCOM_PRODUCT_SIM7100E 0x9001 /* Yes, ALINK_VENDOR_ID */ #define ALINK_PRODUCT_PH300 0x9100 #define ALINK_PRODUCT_3GU 0x9200 @@ -353,6 +362,7 @@ static void option_instat_callback(struct urb *urb); /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick * * It seems to contain a Qualcomm QSC6240/6290 chipset */ #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 +#define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -518,6 +528,11 @@ static const struct option_blacklist_info four_g_w14_blacklist = { .sendsetup = BIT(0) | BIT(1), }; +static const struct option_blacklist_info four_g_w100_blacklist = { + .sendsetup = BIT(1) | BIT(2), + .reserved = BIT(3), +}; + static const struct option_blacklist_info alcatel_x200_blacklist = { .sendsetup = BIT(0) | BIT(1), .reserved = BIT(4), @@ -544,6 +559,18 @@ static const struct option_blacklist_info zte_mc2716_z_blacklist = { .sendsetup = BIT(1) | BIT(2) | BIT(3), }; +static const struct option_blacklist_info zte_me3620_mbim_blacklist = { + .reserved = BIT(2) | BIT(3) | BIT(4), +}; + +static const struct option_blacklist_info zte_me3620_xl_blacklist = { + .reserved = BIT(3) | BIT(4) | BIT(5), +}; + +static const struct option_blacklist_info zte_zm8620_x_blacklist = { + .reserved = BIT(3) | BIT(4) | BIT(5), +}; + static const struct option_blacklist_info huawei_cdc12_blacklist = { .reserved = BIT(1) | BIT(2), }; @@ -585,6 +612,10 @@ static const struct option_blacklist_info zte_1255_blacklist = { .reserved = BIT(3) | BIT(4), }; +static const struct option_blacklist_info simcom_sim7100e_blacklist = { + .reserved = BIT(5) | BIT(6), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -600,6 +631,16 @@ static const struct option_blacklist_info sierra_mc73xx_blacklist = { .reserved = BIT(8) | BIT(10) | BIT(11), }; +static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = { + .sendsetup = BIT(2), + .reserved = BIT(0) | BIT(1) | BIT(3), +}; + +static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = { + .sendsetup = BIT(0), + .reserved = BIT(1) | BIT(2) | BIT(3), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1044,6 +1085,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, @@ -1094,11 +1136,17 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */ + { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */ + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ { USB_DEVICE_INTERFACE_CLASS(SIERRA_VENDOR_ID, 0x68c0, 0xff), .driver_info = (kernel_ulong_t)&sierra_mc73xx_blacklist }, /* MC73xx */ + { USB_DEVICE_INTERFACE_CLASS(SIERRA_VENDOR_ID, 0x9041, 0xff), + .driver_info = (kernel_ulong_t)&sierra_mc73xx_blacklist }, /* MC7305/MC7355 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), @@ -1146,6 +1194,12 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), @@ -1589,6 +1643,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&zte_ad3812_z_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_mc2716_z_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_L), + .driver_info = (kernel_ulong_t)&zte_me3620_xl_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_MBIM), + .driver_info = (kernel_ulong_t)&zte_me3620_mbim_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_X), + .driver_info = (kernel_ulong_t)&zte_me3620_xl_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ZM8620_X), + .driver_info = (kernel_ulong_t)&zte_zm8620_x_blacklist }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) }, @@ -1607,6 +1669,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) }, { USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) }, { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, + { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E), + .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist }, @@ -1627,6 +1691,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14), .driver_info = (kernel_ulong_t)&four_g_w14_blacklist }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), + .driver_info = (kernel_ulong_t)&four_g_w100_blacklist + }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, @@ -1654,7 +1721,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, - { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) }, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, @@ -1765,6 +1832,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { } /* Terminating entry */ diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index f5257af33ecfb..ae682e4eeaef5 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -362,21 +362,38 @@ static speed_t pl2303_encode_baud_rate_direct(unsigned char buf[4], static speed_t pl2303_encode_baud_rate_divisor(unsigned char buf[4], speed_t baud) { - unsigned int tmp; + unsigned int baseline, mantissa, exponent; /* * Apparently the formula is: - * baudrate = 12M * 32 / (2^buf[1]) / buf[0] + * baudrate = 12M * 32 / (mantissa * 4^exponent) + * where + * mantissa = buf[8:0] + * exponent = buf[11:9] */ - tmp = 12000000 * 32 / baud; + baseline = 12000000 * 32; + mantissa = baseline / baud; + if (mantissa == 0) + mantissa = 1; /* Avoid dividing by zero if baud > 32*12M. */ + exponent = 0; + while (mantissa >= 512) { + if (exponent < 7) { + mantissa >>= 2; /* divide by 4 */ + exponent++; + } else { + /* Exponent is maxed. Trim mantissa and leave. */ + mantissa = 511; + break; + } + } + buf[3] = 0x80; buf[2] = 0; - buf[1] = (tmp >= 256); - while (tmp >= 256) { - tmp >>= 2; - buf[1] <<= 1; - } - buf[0] = tmp; + buf[1] = exponent << 1 | mantissa >> 8; + buf[0] = mantissa & 0xff; + + /* Calculate and return the exact baud rate. */ + baud = (baseline / mantissa) >> (exponent << 1); return baud; } diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 9c63897b3a564..f0a2ad15a9926 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -22,6 +22,8 @@ #define DRIVER_AUTHOR "Qualcomm Inc" #define DRIVER_DESC "Qualcomm USB Serial driver" +#define QUECTEL_EC20_PID 0x9215 + /* standard device layouts supported by this driver */ enum qcserial_layouts { QCSERIAL_G2K = 0, /* Gobi 2000 */ @@ -139,13 +141,13 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x0AF0, 0x8120)}, /* Option GTM681W */ /* non-Gobi Sierra Wireless devices */ + {DEVICE_SWI(0x03f0, 0x4e1d)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {DEVICE_SWI(0x0f3d, 0x68a2)}, /* Sierra Wireless MC7700 */ {DEVICE_SWI(0x114f, 0x68a2)}, /* Sierra Wireless MC7750 */ {DEVICE_SWI(0x1199, 0x68a2)}, /* Sierra Wireless MC7710 */ {DEVICE_SWI(0x1199, 0x901c)}, /* Sierra Wireless EM7700 */ {DEVICE_SWI(0x1199, 0x901f)}, /* Sierra Wireless EM7355 */ {DEVICE_SWI(0x1199, 0x9040)}, /* Sierra Wireless Modem */ - {DEVICE_SWI(0x1199, 0x9041)}, /* Sierra Wireless MC7305/MC7355 */ {DEVICE_SWI(0x1199, 0x9051)}, /* Netgear AirCard 340U */ {DEVICE_SWI(0x1199, 0x9053)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9054)}, /* Sierra Wireless Modem */ @@ -153,11 +155,17 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx */ + {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ + {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ @@ -166,6 +174,38 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); +static int handle_quectel_ec20(struct device *dev, int ifnum) +{ + int altsetting = 0; + + /* + * Quectel EC20 Mini PCIe LTE module layout: + * 0: DM/DIAG (use libqcdm from ModemManager for communication) + * 1: NMEA + * 2: AT-capable modem port + * 3: Modem interface + * 4: NDIS + */ + switch (ifnum) { + case 0: + dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n"); + break; + case 1: + dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n"); + break; + case 2: + case 3: + dev_dbg(dev, "Quectel EC20 Modem port found\n"); + break; + case 4: + /* Don't claim the QMI/net interface */ + altsetting = -1; + break; + } + + return altsetting; +} + static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) { struct usb_host_interface *intf = serial->interface->cur_altsetting; @@ -175,6 +215,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) __u8 ifnum; int altsetting = -1; + /* we only support vendor specific functions */ + if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + goto done; + nintf = serial->dev->actconfig->desc.bNumInterfaces; dev_dbg(dev, "Num Interfaces = %d\n", nintf); ifnum = intf->desc.bInterfaceNumber; @@ -234,6 +278,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) altsetting = -1; break; case QCSERIAL_G2K: + /* handle non-standard layouts */ + if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) { + altsetting = handle_quectel_ec20(dev, ifnum); + goto done; + } + /* * Gobi 2K+ USB layout: * 0: QMI/net @@ -294,29 +344,39 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case QCSERIAL_HWI: /* - * Huawei layout: - * 0: AT-capable modem port - * 1: DM/DIAG - * 2: AT-capable modem port - * 3: CCID-compatible PCSC interface - * 4: QMI/net - * 5: NMEA + * Huawei devices map functions by subclass + protocol + * instead of interface numbers. The protocol identify + * a specific function, while the subclass indicate a + * specific firmware source + * + * This is a blacklist of functions known to be + * non-serial. The rest are assumed to be serial and + * will be handled by this driver */ - switch (ifnum) { - case 0: - case 2: - dev_dbg(dev, "Modem port found\n"); - break; - case 1: - dev_dbg(dev, "DM/DIAG interface found\n"); - break; - case 5: - dev_dbg(dev, "NMEA GPS interface found\n"); - break; - default: - /* don't claim any unsupported interface */ + switch (intf->desc.bInterfaceProtocol) { + /* QMI combined (qmi_wwan) */ + case 0x07: + case 0x37: + case 0x67: + /* QMI data (qmi_wwan) */ + case 0x08: + case 0x38: + case 0x68: + /* QMI control (qmi_wwan) */ + case 0x09: + case 0x39: + case 0x69: + /* NCM like (huawei_cdc_ncm) */ + case 0x16: + case 0x46: + case 0x76: altsetting = -1; break; + default: + dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n", + intf->desc.bInterfaceClass, + intf->desc.bInterfaceSubClass, + intf->desc.bInterfaceProtocol); } break; default: diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 46179a0828ebc..07d1ecd564f79 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -289,6 +289,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, + { USB_DEVICE(0x1199, 0x68AB) }, /* Sierra Wireless AR8550 */ /* AT&T Direct IP LTE modems */ { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index 8fceec7298e00..6ed804450a5a6 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -94,7 +94,7 @@ static void symbol_int_callback(struct urb *urb) static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct symbol_private *priv = usb_get_serial_data(port->serial); + struct symbol_private *priv = usb_get_serial_port_data(port); unsigned long flags; int result = 0; @@ -120,7 +120,7 @@ static void symbol_close(struct usb_serial_port *port) static void symbol_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct symbol_private *priv = usb_get_serial_data(port->serial); + struct symbol_private *priv = usb_get_serial_port_data(port); spin_lock_irq(&priv->lock); priv->throttled = true; @@ -130,7 +130,7 @@ static void symbol_throttle(struct tty_struct *tty) static void symbol_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct symbol_private *priv = usb_get_serial_data(port->serial); + struct symbol_private *priv = usb_get_serial_port_data(port); int result; bool was_throttled; diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e9da41d9fe7fc..2694df2f4559b 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -159,6 +159,7 @@ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; @@ -191,6 +192,7 @@ static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h index 4a2423e84d558..98f35c656c02d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ b/drivers/usb/serial/ti_usb_3410_5052.h @@ -56,6 +56,10 @@ #define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID #define ABBOTT_STRIP_PORT_ID 0x3420 +/* Honeywell vendor and product IDs */ +#define HONEYWELL_VENDOR_ID 0x10ac +#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ + /* Commands */ #define TI_GET_VERSION 0x01 #define TI_GET_PORT_STATUS 0x02 diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 3658662898fcb..a204782ae530e 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -53,6 +53,7 @@ DEVICE(funsoft, FUNSOFT_IDS); /* Infineon Flashloader driver */ #define FLASHLOADER_IDS() \ + { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \ { USB_DEVICE(0x8087, 0x0716) } DEVICE(flashloader, FLASHLOADER_IDS); diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 529066bbc7e81..46f1f13b41f14 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1306,6 +1306,7 @@ static void __exit usb_serial_exit(void) tty_unregister_driver(usb_serial_tty_driver); put_tty_driver(usb_serial_tty_driver); bus_unregister(&usb_serial_bus_type); + idr_destroy(&serial_minors); } diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 60afb39eb73c0..337a0be89fcf0 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial) (serial->num_interrupt_in == 0)) return 0; + if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + /* * It appears that Treos and Kyoceras want to use the * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint, @@ -597,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial) */ /* some sanity check */ - if (serial->num_ports < 2) - return -1; + if (serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing bulk out endpoints\n"); + return -ENODEV; + } /* port 0 now uses the modified endpoint Address */ port = serial->port[0]; diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 6c3734d2b45a7..d3ea90bef84d9 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -80,6 +80,8 @@ static int whiteheat_firmware_download(struct usb_serial *serial, static int whiteheat_firmware_attach(struct usb_serial *serial); /* function prototypes for the Connect Tech WhiteHEAT serial converter */ +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id); static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_release(struct usb_serial *serial); static int whiteheat_port_probe(struct usb_serial_port *port); @@ -116,6 +118,7 @@ static struct usb_serial_driver whiteheat_device = { .description = "Connect Tech - WhiteHEAT", .id_table = id_table_std, .num_ports = 4, + .probe = whiteheat_probe, .attach = whiteheat_attach, .release = whiteheat_release, .port_probe = whiteheat_port_probe, @@ -217,6 +220,34 @@ static int whiteheat_firmware_attach(struct usb_serial *serial) /***************************************************************************** * Connect Tech's White Heat serial driver functions *****************************************************************************/ + +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id) +{ + struct usb_host_interface *iface_desc; + struct usb_endpoint_descriptor *endpoint; + size_t num_bulk_in = 0; + size_t num_bulk_out = 0; + size_t min_num_bulk; + unsigned int i; + + iface_desc = serial->interface->cur_altsetting; + + for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { + endpoint = &iface_desc->endpoint[i].desc; + if (usb_endpoint_is_bulk_in(endpoint)) + ++num_bulk_in; + if (usb_endpoint_is_bulk_out(endpoint)) + ++num_bulk_out; + } + + min_num_bulk = COMMAND_PORT + 1; + if (num_bulk_in < min_num_bulk || num_bulk_out < min_num_bulk) + return -ENODEV; + + return 0; +} + static int whiteheat_attach(struct usb_serial *serial) { struct usb_serial_port *command_port; diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 6d3122afeed33..75e4979e6c159 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -796,6 +796,10 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_NO_REPORT_OPCODES) sdev->no_report_opcodes = 1; + /* A few buggy USB-ATA bridges don't understand FUA */ + if (devinfo->flags & US_FL_BROKEN_FUA) + sdev->broken_fua = 1; + scsi_change_queue_depth(sdev, devinfo->qdepth - 2); return 0; } diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index caf188800c679..4095824c8c6da 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1987,7 +1987,7 @@ UNUSUAL_DEV( 0x14cd, 0x6600, 0x0201, 0x0201, US_FL_IGNORE_RESIDUE ), /* Reported by Michael Büsch */ -UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0114, +UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116, "JMicron", "USB to ATA/ATAPI Bridge", USB_SC_DEVICE, USB_PR_DEVICE, NULL, @@ -2065,6 +2065,18 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_READ_DISC_INFO ), +/* Reported by Oliver Neukum + * This device morphes spontaneously into another device if the access + * pattern of Windows isn't followed. Thus writable media would be dirty + * if the initial instance is used. So the device is limited to its + * virtual CD. + * And yes, the concept that BCD goes up to 9 is not heeded */ +UNUSUAL_DEV( 0x19d2, 0x1225, 0x0000, 0xffff, + "ZTE,Incorporated", + "ZTE WCDMA Technologies MSM", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_SINGLE_LUN ), + /* Reported by Sven Geggus * This encrypted pen drive returns bogus data for the initial READ(10). */ diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index c85ea530085f1..ccc113e83d88e 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -132,7 +132,7 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999, "JMicron", "JMS567", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_OPCODES), + US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES), /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e9851add6f4ef..c0f4ab83aaa83 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -446,7 +446,8 @@ static long vfio_pci_ioctl(void *device_data, info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct pci_dev *pdev = vdev->pdev; @@ -520,7 +521,8 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; } - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -555,7 +557,8 @@ static long vfio_pci_ioctl(void *device_data, else info.flags |= VFIO_IRQ_INFO_NORESIZE; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index abcff7a1aa667..973b24ffe3323 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -163,7 +163,8 @@ static long vfio_platform_ioctl(void *device_data, info.num_regions = vdev->num_regions; info.num_irqs = vdev->num_irqs; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; @@ -184,7 +185,8 @@ static long vfio_platform_ioctl(void *device_data, info.size = vdev->regions[info.index].size; info.flags = vdev->regions[info.index].flags; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -203,7 +205,8 @@ static long vfio_platform_ioctl(void *device_data, info.flags = vdev->irqs[info.index].flags; info.count = vdev->irqs[info.index].count; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 57d8c37a002b0..0922165407567 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -986,7 +986,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.iova_pgsizes = vfio_pgsize_bitmap(iommu); - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_IOMMU_MAP_DMA) { struct vfio_iommu_type1_dma_map map; @@ -1019,7 +1020,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (ret) return ret; - return copy_to_user((void __user *)arg, &unmap, minsz); + return copy_to_user((void __user *)arg, &unmap, minsz) ? + -EFAULT : 0; } return -ENOTTY; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ea32b386797f5..636435b412936 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -607,7 +607,7 @@ static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd) static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee28266fd070..fa49d3294cd52 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -886,6 +886,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) } if (eventfp != d->log_file) { filep = d->log_file; + d->log_file = eventfp; ctx = d->log_ctx; d->log_ctx = eventfp ? eventfd_ctx_fileget(eventfp) : NULL; diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 1094623030879..d1e1e1704da1f 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -298,7 +298,7 @@ config FB_ARMCLCD # Helper logic selected only by the ARM Versatile platform family. config PLAT_VERSATILE_CLCD - def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS + def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || ARCH_INTEGRATOR depends on ARM depends on FB_ARMCLCD && FB=y diff --git a/drivers/video/fbdev/mxsfb.c b/drivers/video/fbdev/mxsfb.c index f8ac4a452f26d..0f64165b01474 100644 --- a/drivers/video/fbdev/mxsfb.c +++ b/drivers/video/fbdev/mxsfb.c @@ -316,6 +316,18 @@ static int mxsfb_check_var(struct fb_var_screeninfo *var, return 0; } +static inline void mxsfb_enable_axi_clk(struct mxsfb_info *host) +{ + if (host->clk_axi) + clk_prepare_enable(host->clk_axi); +} + +static inline void mxsfb_disable_axi_clk(struct mxsfb_info *host) +{ + if (host->clk_axi) + clk_disable_unprepare(host->clk_axi); +} + static void mxsfb_enable_controller(struct fb_info *fb_info) { struct mxsfb_info *host = to_imxfb_host(fb_info); @@ -333,14 +345,13 @@ static void mxsfb_enable_controller(struct fb_info *fb_info) } } - if (host->clk_axi) - clk_prepare_enable(host->clk_axi); - if (host->clk_disp_axi) clk_prepare_enable(host->clk_disp_axi); clk_prepare_enable(host->clk); clk_set_rate(host->clk, PICOS2KHZ(fb_info->var.pixclock) * 1000U); + mxsfb_enable_axi_clk(host); + /* if it was disabled, re-enable the mode again */ writel(CTRL_DOTCLK_MODE, host->base + LCDC_CTRL + REG_SET); @@ -380,11 +391,11 @@ static void mxsfb_disable_controller(struct fb_info *fb_info) reg = readl(host->base + LCDC_VDCTRL4); writel(reg & ~VDCTRL4_SYNC_SIGNALS_ON, host->base + LCDC_VDCTRL4); + mxsfb_disable_axi_clk(host); + clk_disable_unprepare(host->clk); if (host->clk_disp_axi) clk_disable_unprepare(host->clk_disp_axi); - if (host->clk_axi) - clk_disable_unprepare(host->clk_axi); host->enabled = 0; @@ -421,6 +432,8 @@ static int mxsfb_set_par(struct fb_info *fb_info) mxsfb_disable_controller(fb_info); } + mxsfb_enable_axi_clk(host); + /* clear the FIFOs */ writel(CTRL1_FIFO_CLEAR, host->base + LCDC_CTRL1 + REG_SET); @@ -438,6 +451,7 @@ static int mxsfb_set_par(struct fb_info *fb_info) ctrl |= CTRL_SET_WORD_LENGTH(3); switch (host->ld_intf_width) { case STMLCDIF_8BIT: + mxsfb_disable_axi_clk(host); dev_err(&host->pdev->dev, "Unsupported LCD bus width mapping\n"); return -EINVAL; @@ -451,6 +465,7 @@ static int mxsfb_set_par(struct fb_info *fb_info) writel(CTRL1_SET_BYTE_PACKAGING(0x7), host->base + LCDC_CTRL1); break; default: + mxsfb_disable_axi_clk(host); dev_err(&host->pdev->dev, "Unhandled color depth of %u\n", fb_info->var.bits_per_pixel); return -EINVAL; @@ -504,6 +519,8 @@ static int mxsfb_set_par(struct fb_info *fb_info) fb_info->fix.line_length * fb_info->var.yoffset, host->base + host->devdata->next_buf); + mxsfb_disable_axi_clk(host); + if (reenable) mxsfb_enable_controller(fb_info); @@ -582,10 +599,14 @@ static int mxsfb_pan_display(struct fb_var_screeninfo *var, offset = fb_info->fix.line_length * var->yoffset; + mxsfb_enable_axi_clk(host); + /* update on next VSYNC */ writel(fb_info->fix.smem_start + offset, host->base + host->devdata->next_buf); + mxsfb_disable_axi_clk(host); + return 0; } @@ -608,13 +629,17 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, unsigned line_count; unsigned period; unsigned long pa, fbsize; - int bits_per_pixel, ofs; + int bits_per_pixel, ofs, ret = 0; u32 transfer_count, vdctrl0, vdctrl2, vdctrl3, vdctrl4, ctrl; + mxsfb_enable_axi_clk(host); + /* Only restore the mode when the controller is running */ ctrl = readl(host->base + LCDC_CTRL); - if (!(ctrl & CTRL_RUN)) - return -EINVAL; + if (!(ctrl & CTRL_RUN)) { + ret = -EINVAL; + goto err; + } vdctrl0 = readl(host->base + LCDC_VDCTRL0); vdctrl2 = readl(host->base + LCDC_VDCTRL2); @@ -635,7 +660,8 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, break; case 1: default: - return -EINVAL; + ret = -EINVAL; + goto err; } fb_info->var.bits_per_pixel = bits_per_pixel; @@ -673,10 +699,14 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, pa = readl(host->base + host->devdata->cur_buf); fbsize = fb_info->fix.line_length * vmode->yres; - if (pa < fb_info->fix.smem_start) - return -EINVAL; - if (pa + fbsize > fb_info->fix.smem_start + fb_info->fix.smem_len) - return -EINVAL; + if (pa < fb_info->fix.smem_start) { + ret = -EINVAL; + goto err; + } + if (pa + fbsize > fb_info->fix.smem_start + fb_info->fix.smem_len) { + ret = -EINVAL; + goto err; + } ofs = pa - fb_info->fix.smem_start; if (ofs) { memmove(fb_info->screen_base, fb_info->screen_base + ofs, fbsize); @@ -689,7 +719,11 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, clk_prepare_enable(host->clk); host->enabled = 1; - return 0; +err: + if (ret) + mxsfb_disable_axi_clk(host); + + return ret; } static int mxsfb_init_fbinfo_dt(struct mxsfb_info *host, @@ -915,7 +949,9 @@ static int mxsfb_probe(struct platform_device *pdev) } if (!host->enabled) { + mxsfb_enable_axi_clk(host); writel(0, host->base + LCDC_CTRL); + mxsfb_disable_axi_clk(host); mxsfb_set_par(fb_info); mxsfb_enable_controller(fb_info); } @@ -954,11 +990,15 @@ static void mxsfb_shutdown(struct platform_device *pdev) struct fb_info *fb_info = platform_get_drvdata(pdev); struct mxsfb_info *host = to_imxfb_host(fb_info); + mxsfb_enable_axi_clk(host); + /* * Force stop the LCD controller as keeping it running during reboot * might interfere with the BootROM's boot mode pads sampling. */ writel(CTRL_RUN, host->base + LCDC_CTRL + REG_CLR); + + mxsfb_disable_axi_clk(host); } static struct platform_driver mxsfb_driver = { diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 82e80e034f250..89bac470f04e0 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -166,13 +166,13 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) mutex_unlock(&vb->balloon_lock); } -static void release_pages_by_pfn(const u32 pfns[], unsigned int num) +static void release_pages_balloon(struct virtio_balloon *vb) { unsigned int i; /* Find pfns pointing at start of each page, get pages and free them. */ - for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { - struct page *page = balloon_pfn_to_page(pfns[i]); + for (i = 0; i < vb->num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { + struct page *page = balloon_pfn_to_page(vb->pfns[i]); adjust_managed_page_count(page, 1); put_page(page); /* balloon reference */ } @@ -205,8 +205,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) */ if (vb->num_pfns != 0) tell_host(vb, vb->deflate_vq); + release_pages_balloon(vb); mutex_unlock(&vb->balloon_lock); - release_pages_by_pfn(vb->pfns, vb->num_pfns); return num_freed_pages; } diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index eba1b7ac72945..14f767e8e5c55 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -554,6 +554,7 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct device *dev = get_device(&vp_dev->vdev.dev); unregister_virtio_device(&vp_dev->vdev); @@ -564,6 +565,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) pci_release_regions(pci_dev); pci_disable_device(pci_dev); + put_device(dev); } static struct pci_driver virtio_pci_driver = { diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 1f11a20a8ab9d..55eb86c9e2141 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -59,16 +59,32 @@ MODULE_ALIAS("w1-family-" __stringify(W1_THERM_DS28EA00)); static int w1_strong_pullup = 1; module_param_named(strong_pullup, w1_strong_pullup, int, 0); +struct w1_therm_family_data { + uint8_t rom[9]; + atomic_t refcnt; +}; + +/* return the address of the refcnt in the family data */ +#define THERM_REFCNT(family_data) \ + (&((struct w1_therm_family_data*)family_data)->refcnt) + static int w1_therm_add_slave(struct w1_slave *sl) { - sl->family_data = kzalloc(9, GFP_KERNEL); + sl->family_data = kzalloc(sizeof(struct w1_therm_family_data), + GFP_KERNEL); if (!sl->family_data) return -ENOMEM; + atomic_set(THERM_REFCNT(sl->family_data), 1); return 0; } static void w1_therm_remove_slave(struct w1_slave *sl) { + int refcnt = atomic_sub_return(1, THERM_REFCNT(sl->family_data)); + while(refcnt) { + msleep(1000); + refcnt = atomic_read(THERM_REFCNT(sl->family_data)); + } kfree(sl->family_data); sl->family_data = NULL; } @@ -194,13 +210,22 @@ static ssize_t w1_slave_show(struct device *device, struct w1_slave *sl = dev_to_w1_slave(device); struct w1_master *dev = sl->master; u8 rom[9], crc, verdict, external_power; - int i, max_trying = 10; + int i, ret, max_trying = 10; ssize_t c = PAGE_SIZE; + u8 *family_data = sl->family_data; + + ret = mutex_lock_interruptible(&dev->bus_mutex); + if (ret != 0) + goto post_unlock; - i = mutex_lock_interruptible(&dev->bus_mutex); - if (i != 0) - return i; + if(!sl->family_data) + { + ret = -ENODEV; + goto pre_unlock; + } + /* prevent the slave from going away in sleep */ + atomic_inc(THERM_REFCNT(family_data)); memset(rom, 0, sizeof(rom)); while (max_trying--) { @@ -230,17 +255,19 @@ static ssize_t w1_slave_show(struct device *device, mutex_unlock(&dev->bus_mutex); sleep_rem = msleep_interruptible(tm); - if (sleep_rem != 0) - return -EINTR; + if (sleep_rem != 0) { + ret = -EINTR; + goto post_unlock; + } - i = mutex_lock_interruptible(&dev->bus_mutex); - if (i != 0) - return i; + ret = mutex_lock_interruptible(&dev->bus_mutex); + if (ret != 0) + goto post_unlock; } else if (!w1_strong_pullup) { sleep_rem = msleep_interruptible(tm); if (sleep_rem != 0) { - mutex_unlock(&dev->bus_mutex); - return -EINTR; + ret = -EINTR; + goto pre_unlock; } } @@ -269,19 +296,24 @@ static ssize_t w1_slave_show(struct device *device, c -= snprintf(buf + PAGE_SIZE - c, c, ": crc=%02x %s\n", crc, (verdict) ? "YES" : "NO"); if (verdict) - memcpy(sl->family_data, rom, sizeof(rom)); + memcpy(family_data, rom, sizeof(rom)); else dev_warn(device, "Read failed CRC check\n"); for (i = 0; i < 9; ++i) c -= snprintf(buf + PAGE_SIZE - c, c, "%02x ", - ((u8 *)sl->family_data)[i]); + ((u8 *)family_data)[i]); c -= snprintf(buf + PAGE_SIZE - c, c, "t=%d\n", w1_convert_temp(rom, sl->family->fid)); + ret = PAGE_SIZE - c; + +pre_unlock: mutex_unlock(&dev->bus_mutex); - return PAGE_SIZE - c; +post_unlock: + atomic_dec(THERM_REFCNT(family_data)); + return ret; } static int __init w1_therm_init(void) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 1e6be9e405779..c9c97dacf4526 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -132,6 +132,13 @@ static int omap_wdt_start(struct watchdog_device *wdog) pm_runtime_get_sync(wdev->dev); + /* + * Make sure the watchdog is disabled. This is unfortunately required + * because writing to various registers with the watchdog running has no + * effect. + */ + omap_wdt_disable(wdev); + /* initialize prescaler */ while (readl_relaxed(base + OMAP_WATCHDOG_WPS) & 0x01) cpu_relax(); diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index a29afb37c48ca..47bd8a14d01f5 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -184,7 +184,7 @@ static int sunxi_wdt_start(struct watchdog_device *wdt_dev) /* Set system reset function */ reg = readl(wdt_base + regs->wdt_cfg); reg &= ~(regs->wdt_reset_mask); - reg |= ~(regs->wdt_reset_val); + reg |= regs->wdt_reset_val; writel(reg, wdt_base + regs->wdt_cfg); /* Enable watchdog */ diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 89274850741b5..ee71baddbb10e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -568,12 +568,14 @@ static int gntdev_release(struct inode *inode, struct file *flip) pr_debug("priv %p\n", priv); + mutex_lock(&priv->lock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); list_del(&map->next); gntdev_put_map(NULL /* already removed */, map); } WARN_ON(!list_empty(&priv->freeable_maps)); + mutex_unlock(&priv->lock); if (use_ptemod) mmu_notifier_unregister(&priv->mn, priv->mm); @@ -802,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index a1800c150839a..08cb419eb4e63 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); asmlinkage __visible void xen_maybe_preempt_hcall(void) { if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) - && should_resched())) { + && need_resched())) { /* * Clear flag as we may be rescheduled on a different * cpu. diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 58e38d586f524..4d529f3e40df9 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -37,6 +37,7 @@ struct xen_pcibk_device { struct xen_pci_sharedinfo *sh_info; unsigned long flags; struct work_struct op_work; + struct xen_pci_op op; }; struct xen_pcibk_dev_data { diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index c4a0666de6f5e..9cf4653b6bd7a 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -197,13 +197,27 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, struct xen_pcibk_dev_data *dev_data; int i, result; struct msix_entry *entries; + u16 cmd; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) return -EINVAL; + if (dev->msix_enabled) + return -EALREADY; + + /* + * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able + * to access the BARs where the MSI-X entries reside. + * But VF devices are unique in which the PF needs to be checked. + */ + pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); + if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) + return -ENXIO; + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); if (entries == NULL) return -ENOMEM; @@ -298,9 +312,14 @@ void xen_pcibk_do_op(struct work_struct *data) container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; - struct xen_pci_op *op = &pdev->sh_info->op; + struct xen_pci_op *op = &pdev->op; int test_intx = 0; +#ifdef CONFIG_PCI_MSI + unsigned int nr = 0; +#endif + *op = pdev->sh_info->op; + barrier(); dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) @@ -326,6 +345,7 @@ void xen_pcibk_do_op(struct work_struct *data) op->err = xen_pcibk_disable_msi(pdev, dev, op); break; case XEN_PCI_OP_enable_msix: + nr = op->value; op->err = xen_pcibk_enable_msix(pdev, dev, op); break; case XEN_PCI_OP_disable_msix: @@ -342,6 +362,17 @@ void xen_pcibk_do_op(struct work_struct *data) if ((dev_data->enable_intx != test_intx)) xen_pcibk_control_isr(dev, 0 /* no reset */); } + pdev->sh_info->op.err = op->err; + pdev->sh_info->op.value = op->value; +#ifdef CONFIG_PCI_MSI + if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { + unsigned int i; + + for (i = 0; i < nr; i++) + pdev->sh_info->op.msix_entries[i].vector = + op->msix_entries[i].vector; + } +#endif /* Tell the driver domain that we're done. */ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index b7f51504f85ad..c561d530be2e9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -941,12 +941,12 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, spin_unlock_irqrestore(&info->v2p_lock, flags); out_free: - mutex_lock(&tpg->tv_tpg_mutex); - tpg->tv_tpg_fe_count--; - mutex_unlock(&tpg->tv_tpg_mutex); - - if (err) + if (err) { + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_fe_count--; + mutex_unlock(&tpg->tv_tpg_mutex); kfree(new); + } return err; } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 96b2011d25f35..658be6cc3db60 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -814,8 +814,10 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, addrs); - if (!rv) + if (!rv) { vunmap(vaddr); + free_xenballooned_pages(node->nr_handles, node->hvm.pages); + } else WARN(1, "Leaking %p, size %u page(s)\n", vaddr, node->nr_handles); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 703342e309f57..53f1e8a217071 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 9861c7c951a6d..4d3ecfb55fcf8 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/block_dev.c b/fs/block_dev.c index c7e4163ede87f..ccfd31f1df3a4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1234,6 +1234,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 614aaa1969bdf..30bc9fa763bd5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1369,7 +1369,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, read_extent_buffer(eb, dest + bytes_left, name_off, name_len); if (eb != eb_in) { - btrfs_tree_read_unlock_blocking(eb); + if (!path->skip_locking) + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } ret = btrfs_find_item(fs_root, path, parent, 0, @@ -1389,9 +1390,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ if (eb != eb_in) { - atomic_inc(&eb->refs); - btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (!path->skip_locking) + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->nodes[0] = NULL; + path->locks[0] = 0; } btrfs_release_path(path); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); @@ -1786,7 +1788,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, int found = 0; struct extent_buffer *eb; struct btrfs_inode_extref *extref; - struct extent_buffer *leaf; u32 item_size; u32 cur_offset; unsigned long ptr; @@ -1814,9 +1815,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, slot); - ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); cur_offset = 0; while (cur_offset < item_size) { @@ -1830,7 +1830,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, if (ret) break; - cur_offset += btrfs_inode_extref_name_len(leaf, extref); + cur_offset += btrfs_inode_extref_name_len(eb, extref); cur_offset += sizeof(*extref); } btrfs_tree_read_unlock_blocking(eb); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae26..61205e3bbefac 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -192,6 +192,10 @@ struct btrfs_inode { /* File creation time. */ struct timespec i_otime; + /* Hook into fs_info->delayed_iputs */ + struct list_head delayed_iput; + long delayed_iput_count; + struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f364e1d8d3d1..699944a074919 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1544,7 +1544,7 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; - struct rw_semaphore delayed_iput_sem; + struct mutex cleaner_delayed_iput_mutex; /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a2ae42720a6af..bc2d048a9eb92 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1690,7 +1690,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, * */ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list) + struct list_head *ins_list, bool *emitted) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; @@ -1734,6 +1734,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, if (over) return 1; + *emitted = true; } return 0; } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f70119f254216..0167853c84aea 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list); + struct list_head *ins_list, bool *emitted); /* for init */ int __init btrfs_delayed_inode_init(void); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2ef9a4b72d06e..99e8f60c79620 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1772,8 +1772,11 @@ static int cleaner_kthread(void *arg) goto sleep; } + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); btrfs_run_delayed_iputs(root); btrfs_delete_unused_bgs(root->fs_info); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); + again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -2491,8 +2494,8 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); + mutex_init(&fs_info->cleaner_delayed_iput_mutex); seqlock_init(&fs_info->profiles_lock); - init_rwsem(&fs_info->delayed_iput_sem); init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0ec3acd14cbf5..3c1938000a5dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3985,11 +3985,12 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes) if (ret) return ret; /* - * make sure that all running delayed iput are - * done + * The cleaner kthread might still be doing iput + * operations. Wait for it to finish so that + * more space is released. */ - down_write(&root->fs_info->delayed_iput_sem); - up_write(&root->fs_info->delayed_iput_sem); + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); goto again; } else { btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c32d226bfeccb..885f533a34d9c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2795,7 +2795,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, - unsigned long bio_flags) + unsigned long bio_flags, + bool force_bio_submit) { int ret = 0; struct bio *bio; @@ -2813,6 +2814,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || + force_bio_submit || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, @@ -2906,7 +2908,8 @@ static int __do_readpage(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2954,6 +2957,7 @@ static int __do_readpage(struct extent_io_tree *tree, } while (cur <= end) { unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + bool force_bio_submit = false; if (cur >= last_byte) { char *userpage; @@ -3004,6 +3008,49 @@ static int __do_readpage(struct extent_io_tree *tree, block_start = em->block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) block_start = EXTENT_MAP_HOLE; + + /* + * If we have a file range that points to a compressed extent + * and it's followed by a consecutive file range that points to + * to the same compressed extent (possibly with a different + * offset and/or length, so it either points to the whole extent + * or only part of it), we must make sure we do not submit a + * single bio to populate the pages for the 2 ranges because + * this makes the compressed extent read zero out the pages + * belonging to the 2nd range. Imagine the following scenario: + * + * File layout + * [0 - 8K] [8K - 24K] + * | | + * | | + * points to extent X, points to extent X, + * offset 4K, length of 8K offset 0, length 16K + * + * [extent X, compressed length = 4K uncompressed length = 16K] + * + * If the bio to read the compressed extent covers both ranges, + * it will decompress extent X into the pages belonging to the + * first range and then it will stop, zeroing out the remaining + * pages that belong to the other range that points to extent X. + * So here we make sure we submit 2 bios, one for the first + * range and another one for the third range. Both will target + * the same physical extent from disk, but we can't currently + * make the compressed bio endio callback populate the pages + * for both ranges because each compressed bio is tightly + * coupled with a single extent map, and each range can have + * an extent map with a different offset value relative to the + * uncompressed data of our extent and different lengths. This + * is a corner case so we prioritize correctness over + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && + prev_em_start && *prev_em_start != (u64)-1 && + *prev_em_start != em->orig_start) + force_bio_submit = true; + + if (prev_em_start) + *prev_em_start = em->orig_start; + free_extent_map(em); em = NULL; @@ -3053,7 +3100,8 @@ static int __do_readpage(struct extent_io_tree *tree, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, - this_bio_flag); + this_bio_flag, + force_bio_submit); if (!ret) { nr++; *bio_flags = this_bio_flag; @@ -3080,7 +3128,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode; struct btrfs_ordered_extent *ordered; @@ -3100,7 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, prev_em_start); page_cache_release(pages[index]); } } @@ -3110,7 +3159,8 @@ static void __extent_readpages(struct extent_io_tree *tree, int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { u64 start = 0; u64 end = 0; @@ -3131,7 +3181,7 @@ static void __extent_readpages(struct extent_io_tree *tree, index - first_index, start, end, get_extent, em_cached, bio, mirror_num, bio_flags, - rw); + rw, prev_em_start); start = page_start; end = start + PAGE_CACHE_SIZE - 1; first_index = index; @@ -3142,7 +3192,8 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, + prev_em_start); } static int __extent_read_full_page(struct extent_io_tree *tree, @@ -3168,7 +3219,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw); + bio_flags, rw, NULL); return ret; } @@ -3194,7 +3245,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, int ret; ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, READ, NULL); if (bio) ret = submit_one_bio(READ, bio, mirror_num, bio_flags); return ret; @@ -3447,7 +3498,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, - 0, 0, 0); + 0, 0, 0, false); if (ret) SetPageError(page); } @@ -3749,7 +3800,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, ret = submit_extent_page(rw, tree, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, - 0, epd->bio_flags, bio_flags); + 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { set_btree_ioerr(p); @@ -4153,6 +4204,7 @@ int extent_readpages(struct extent_io_tree *tree, struct page *page; struct extent_map *em_cached = NULL; int nr = 0; + u64 prev_em_start = (u64)-1; for (page_idx = 0; page_idx < nr_pages; page_idx++) { page = list_entry(pages->prev, struct page, lru); @@ -4169,12 +4221,12 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ); + &bio, 0, &bio_flags, READ, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ); + &bio, 0, &bio_flags, READ, &prev_em_start); if (em_cached) free_extent_map(em_cached); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b072e17479aa8..2b0d84d32db45 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a6374..d4a582ac3f730 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); - kfree(info); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); + kmem_cache_free(btrfs_free_space_cachep, info); } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bb013672aee0..b114a0539d3dc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1294,8 +1294,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; @@ -3074,56 +3080,46 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, start, (size_t)(end - start + 1)); } -struct delayed_iput { - struct list_head list; - struct inode *inode; -}; - -/* JDM: If this is fs-wide, why can't we add a pointer to - * btrfs_inode instead and avoid the allocation? */ void btrfs_add_delayed_iput(struct inode *inode) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; - struct delayed_iput *delayed; + struct btrfs_inode *binode = BTRFS_I(inode); if (atomic_add_unless(&inode->i_count, -1, 1)) return; - delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); - delayed->inode = inode; - spin_lock(&fs_info->delayed_iput_lock); - list_add_tail(&delayed->list, &fs_info->delayed_iputs); + if (binode->delayed_iput_count == 0) { + ASSERT(list_empty(&binode->delayed_iput)); + list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); + } else { + binode->delayed_iput_count++; + } spin_unlock(&fs_info->delayed_iput_lock); } void btrfs_run_delayed_iputs(struct btrfs_root *root) { - LIST_HEAD(list); struct btrfs_fs_info *fs_info = root->fs_info; - struct delayed_iput *delayed; - int empty; - - spin_lock(&fs_info->delayed_iput_lock); - empty = list_empty(&fs_info->delayed_iputs); - spin_unlock(&fs_info->delayed_iput_lock); - if (empty) - return; - - down_read(&fs_info->delayed_iput_sem); spin_lock(&fs_info->delayed_iput_lock); - list_splice_init(&fs_info->delayed_iputs, &list); - spin_unlock(&fs_info->delayed_iput_lock); - - while (!list_empty(&list)) { - delayed = list_entry(list.next, struct delayed_iput, list); - list_del(&delayed->list); - iput(delayed->inode); - kfree(delayed); + while (!list_empty(&fs_info->delayed_iputs)) { + struct btrfs_inode *inode; + + inode = list_first_entry(&fs_info->delayed_iputs, + struct btrfs_inode, delayed_iput); + if (inode->delayed_iput_count) { + inode->delayed_iput_count--; + list_move_tail(&inode->delayed_iput, + &fs_info->delayed_iputs); + } else { + list_del_init(&inode->delayed_iput); + } + spin_unlock(&fs_info->delayed_iput_lock); + iput(&inode->vfs_inode); + spin_lock(&fs_info->delayed_iput_lock); } - - up_read(&root->fs_info->delayed_iput_sem); + spin_unlock(&fs_info->delayed_iput_lock); } /* @@ -4184,6 +4180,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } +static int truncate_inline_extent(struct inode *inode, + struct btrfs_path *path, + struct btrfs_key *found_key, + const u64 item_end, + const u64 new_size) +{ + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + struct btrfs_file_extent_item *fi; + u32 size = (u32)(new_size - found_key->offset); + struct btrfs_root *root = BTRFS_I(inode)->root; + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { + loff_t offset = new_size; + loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); + + /* + * Zero out the remaining of the last page of our inline extent, + * instead of directly truncating our inline extent here - that + * would be much more complex (decompressing all the data, then + * compressing the truncated data, which might be bigger than + * the size of the inline extent, resize the extent, etc). + * We release the path because to get the page we might need to + * read the extent item from disk (data not in the page cache). + */ + btrfs_release_path(path); + return btrfs_truncate_page(inode, offset, page_end - offset, 0); + } + + btrfs_set_file_extent_ram_bytes(leaf, fi, size); + size = btrfs_file_extent_calc_inline_size(size); + btrfs_truncate_item(root, path, size, 1); + + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + inode_sub_bytes(inode, item_end + 1 - new_size); + + return 0; +} + /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -4378,27 +4415,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * special encodings */ if (!del_item && - btrfs_file_extent_compression(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_other_encoding(leaf, fi) == 0) { - u32 size = new_size - found_key.offset; - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) - inode_sub_bytes(inode, item_end + 1 - - new_size); /* - * update the ram bytes to properly reflect - * the new size of our item + * Need to release path in order to truncate a + * compressed extent. So delete any accumulated + * extent items so far. */ - btrfs_set_file_extent_ram_bytes(leaf, fi, size); - size = - btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(root, path, size, 1); + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE && pending_del_nr) { + err = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + if (err) { + btrfs_abort_transaction(trans, + root, + err); + goto error; + } + pending_del_nr = 0; + } + + err = truncate_inline_extent(inode, path, + &found_key, + item_end, + new_size); + if (err) { + btrfs_abort_transaction(trans, + root, err); + goto error; + } } else if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { - inode_sub_bytes(inode, item_end + 1 - - found_key.offset); + inode_sub_bytes(inode, item_end + 1 - new_size); } } delete: @@ -5035,7 +5085,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_free_io_failure_record(inode, 0, (u64)-1); @@ -5615,6 +5666,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) char *name_ptr; int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ + bool emitted; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5643,6 +5695,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (ret < 0) goto err; + emitted = false; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -5722,6 +5775,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (over) goto nopos; + emitted = true; di_len = btrfs_dir_name_len(leaf, di) + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; @@ -5734,11 +5788,20 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (key_type == BTRFS_DIR_INDEX_KEY) { if (is_curr) ctx->pos++; - ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); + ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted); if (ret) goto nopos; } + /* + * If we haven't emitted any dir entry, we must not touch ctx->pos as + * it was was set to the termination value in previous call. We assume + * that "." and ".." were emitted if we reach this point and set the + * termination value as well for an empty directory. + */ + if (ctx->pos > 2 && !emitted) + goto nopos; + /* Reached end of directory/root. Bump pos past the last item. */ ctx->pos++; @@ -8829,6 +8892,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->dir_index = 0; ei->last_unlink_trans = 0; ei->last_log_commit = 0; + ei->delayed_iput_count = 0; spin_lock_init(&ei->lock); ei->outstanding_extents = 0; @@ -8853,6 +8917,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->delalloc_inodes); + INIT_LIST_HEAD(&ei->delayed_iput); RB_CLEAR_NODE(&ei->rb_node); return inode; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1c22c65185045..87c720865ebf0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2413,8 +2413,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_unlock_inode; } - d_invalidate(dentry); - down_write(&root->fs_info->subvol_sem); err = may_destroy_subvol(dest); @@ -2508,7 +2506,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, out_unlock_inode: mutex_unlock(&inode->i_mutex); if (!err) { - shrink_dcache_sb(root->fs_info->sb); + d_invalidate(dentry); btrfs_invalidate_inodes(dest); d_delete(dentry); ASSERT(dest->send_in_progress == 0); @@ -2940,7 +2938,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, static long btrfs_ioctl_file_extent_same(struct file *file, struct btrfs_ioctl_same_args __user *argp) { - struct btrfs_ioctl_same_args *same; + struct btrfs_ioctl_same_args *same = NULL; struct btrfs_ioctl_same_extent_info *info; struct inode *src = file_inode(file); u64 off; @@ -2970,6 +2968,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (IS_ERR(same)) { ret = PTR_ERR(same); + same = NULL; goto out; } @@ -3040,6 +3039,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, out: mnt_drop_write_file(file); + kfree(same); return ret; } @@ -3166,6 +3166,150 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } +/* + * Make sure we do not end up inserting an inline extent into a file that has + * already other (non-inline) extents. If a file has an inline extent it can + * not have any other extents and the (single) inline extent must start at the + * file offset 0. Failing to respect these rules will lead to file corruption, + * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc + * + * We can have extents that have been already written to disk or we can have + * dirty ranges still in delalloc, in which case the extent maps and items are + * created only when we run delalloc, and the delalloc ranges might fall outside + * the range we are currently locking in the inode's io tree. So we check the + * inode's i_size because of that (i_size updates are done while holding the + * i_mutex, which we are holding here). + * We also check to see if the inode has a size not greater than "datal" but has + * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are + * protected against such concurrent fallocate calls by the i_mutex). + * + * If the file has no extents but a size greater than datal, do not allow the + * copy because we would need turn the inline extent into a non-inline one (even + * with NO_HOLES enabled). If we find our destination inode only has one inline + * extent, just overwrite it with the source inline extent if its size is less + * than the source extent's size, or we could copy the source inline extent's + * data into the destination inode's inline extent if the later is greater then + * the former. + */ +static int clone_copy_inline_extent(struct inode *src, + struct inode *dst, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_key *new_key, + const u64 drop_start, + const u64 datal, + const u64 skip, + const u64 size, + char *inline_data) +{ + struct btrfs_root *root = BTRFS_I(dst)->root; + const u64 aligned_end = ALIGN(new_key->offset + datal, + root->sectorsize); + int ret; + struct btrfs_key key; + + if (new_key->offset > 0) + return -EOPNOTSUPP; + + key.objectid = btrfs_ino(dst); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return ret; + } else if (ret > 0) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + goto copy_inline_extent; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) { + ASSERT(key.offset > 0); + return -EOPNOTSUPP; + } + } else if (i_size_read(dst) <= datal) { + struct btrfs_file_extent_item *ei; + u64 ext_len; + + /* + * If the file size is <= datal, make sure there are no other + * extents following (can happen do to an fallocate call with + * the flag FALLOC_FL_KEEP_SIZE). + */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + /* + * If it's an inline extent, it can not have other extents + * following it. + */ + if (btrfs_file_extent_type(path->nodes[0], ei) == + BTRFS_FILE_EXTENT_INLINE) + goto copy_inline_extent; + + ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); + if (ext_len > aligned_end) + return -EOPNOTSUPP; + + ret = btrfs_next_item(root, path); + if (ret < 0) { + return ret; + } else if (ret == 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) + return -EOPNOTSUPP; + } + } + +copy_inline_extent: + /* + * We have no extent items, or we have an extent at offset 0 which may + * or may not be inlined. All these cases are dealt the same way. + */ + if (i_size_read(dst) > datal) { + /* + * If the destination inode has an inline extent... + * This would require copying the data from the source inline + * extent into the beginning of the destination's inline extent. + * But this is really complex, both extents can be compressed + * or just one of them, which would require decompressing and + * re-compressing data (which could increase the new compressed + * size, not allowing the compressed data to fit anymore in an + * inline extent). + * So just don't support this case for now (it should be rare, + * we are not really saving space when cloning inline extents). + */ + return -EOPNOTSUPP; + } + + btrfs_release_path(path); + ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); + if (ret) + return ret; + ret = btrfs_insert_empty_item(trans, root, path, new_key, size); + if (ret) + return ret; + + if (skip) { + const u32 start = btrfs_file_extent_calc_inline_size(0); + + memmove(inline_data + start, inline_data + start + skip, datal); + } + + write_extent_buffer(path->nodes[0], inline_data, + btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]), + size); + inode_add_bytes(dst, datal); + + return 0; +} + /** * btrfs_clone() - clone a range from inode file to another * @@ -3432,7 +3576,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; - u64 aligned_end = 0; if (off > key.offset) { skip = off - key.offset; @@ -3450,42 +3593,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode, size -= skip + trim; datal -= skip + trim; - aligned_end = ALIGN(new_key.offset + datal, - root->sectorsize); - ret = btrfs_drop_extents(trans, root, inode, - drop_start, - aligned_end, - 1); + ret = clone_copy_inline_extent(src, inode, + trans, path, + &new_key, + drop_start, + datal, + skip, size, buf); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, ret); - btrfs_end_transaction(trans, root); - goto out; - } - - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); + root, + ret); btrfs_end_transaction(trans, root); goto out; } - - if (skip) { - u32 start = - btrfs_file_extent_calc_inline_size(0); - memmove(buf+start, buf+start+skip, - datal); - } - leaf = path->nodes[0]; slot = path->slots[0]; - write_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */ @@ -4478,6 +4601,11 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) bctl->flags |= BTRFS_BALANCE_TYPE_MASK; } + if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { + ret = -EINVAL; + goto out_bctl; + } + do_balance: /* * Ownership of bctl and mutually_exclusive_operation_running @@ -4489,12 +4617,15 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) need_unlock = false; ret = btrfs_balance(bctl, bargs); + bctl = NULL; if (arg) { if (copy_to_user(arg, bargs, sizeof(*bargs))) ret = -EFAULT; } +out_bctl: + kfree(bctl); out_bargs: kfree(bargs); out_unlock: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5628e25250c0d..00d18c2bdb0f2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -758,7 +758,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->ordered)) { spin_lock(&info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); spin_unlock(&info->trans_lock); } @@ -1848,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); @@ -1875,8 +1875,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->trans_lock); wait_for_commit(root, prev_trans); + ret = prev_trans->aborted; btrfs_put_transaction(prev_trans); + if (ret) + goto cleanup_transaction; } else { spin_unlock(&root->fs_info->trans_lock); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d04968374e9d8..4920fceffacb2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4161,6 +4161,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4269,11 +4270,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4296,6 +4292,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4366,6 +4365,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 174f5e1e00abf..5113b7257b453 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6322,6 +6322,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) goto out_short_read; num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk(KERN_ERR + "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ebc31331a8374..e1cc5b45069af 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -372,6 +372,14 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) +#define BTRFS_BALANCE_ARGS_MASK \ + (BTRFS_BALANCE_ARGS_PROFILES | \ + BTRFS_BALANCE_ARGS_USAGE | \ + BTRFS_BALANCE_ARGS_DEVID | \ + BTRFS_BALANCE_ARGS_DRANGE | \ + BTRFS_BALANCE_ARGS_VRANGE | \ + BTRFS_BALANCE_ARGS_LIMIT) + /* * Profile changing flags. When SOFT is set we won't relocate chunk if * it already has the target profile (even though it may be diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e1c14..1fcd7b6e7564d 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84f37f34f9aa6..1e99b29650a96 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1905,7 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e9905374078a..0d47422e35481 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -466,7 +466,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) - seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); + seq_show_option(m, "snapdirname", fsopt->snapdir_name); return 0; } diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 7febcf2475c5a..50b2684833029 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("CIFS VFS: %pV", &vaf); + pr_err_ratelimited("CIFS VFS: %pV", &vaf); va_end(args); } diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index f40fbaca1b2a2..66cf0f9fff898 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...); /* information message: e.g., configuration, major event */ #define cifs_dbg(type, fmt, ...) \ do { \ - if (type == FYI) { \ - if (cifsFYI & CIFS_INFO) { \ - pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \ - } \ + if (type == FYI && cifsFYI & CIFS_INFO) { \ + pr_debug_ratelimited("%s: " \ + fmt, __FILE__, ##__VA_ARGS__); \ } else if (type == VFS) { \ cifs_vfs_err(fmt, ##__VA_ARGS__); \ } else if (type == NOISY && type != 0) { \ - pr_debug(fmt, ##__VA_ARGS__); \ + pr_debug_ratelimited(fmt, ##__VA_ARGS__); \ } \ } while (0) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index aa0dc25733741..e682b36a210f2 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -444,6 +444,48 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) return 0; } +/* Server has provided av pairs/target info in the type 2 challenge + * packet and we have plucked it and stored within smb session. + * We parse that blob here to find the server given timestamp + * as part of ntlmv2 authentication (or local current time as + * default in case of failure) + */ +static __le64 +find_timestamp(struct cifs_ses *ses) +{ + unsigned int attrsize; + unsigned int type; + unsigned int onesize = sizeof(struct ntlmssp2_name); + unsigned char *blobptr; + unsigned char *blobend; + struct ntlmssp2_name *attrptr; + + if (!ses->auth_key.len || !ses->auth_key.response) + return 0; + + blobptr = ses->auth_key.response; + blobend = blobptr + ses->auth_key.len; + + while (blobptr + onesize < blobend) { + attrptr = (struct ntlmssp2_name *) blobptr; + type = le16_to_cpu(attrptr->type); + if (type == NTLMSSP_AV_EOL) + break; + blobptr += 2; /* advance attr type */ + attrsize = le16_to_cpu(attrptr->length); + blobptr += 2; /* advance attr size */ + if (blobptr + attrsize > blobend) + break; + if (type == NTLMSSP_AV_TIMESTAMP) { + if (attrsize == sizeof(u64)) + return *((__le64 *)blobptr); + } + blobptr += attrsize; /* advance attr value */ + } + + return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); +} + static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, const struct nls_table *nls_cp) { @@ -641,6 +683,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; unsigned char *tiblob = NULL; /* target info blob */ + __le64 rsp_timestamp; if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { if (!ses->domainName) { @@ -659,13 +702,19 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } } + /* Must be within 5 minutes of the server (or in range +/-2h + * in case of Mac OS X), so simply carry over server timestamp + * (as Windows 7 does) + */ + rsp_timestamp = find_timestamp(ses); + baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); tilen = ses->auth_key.len; tiblob = ses->auth_key.response; ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); if (!ses->auth_key.response) { - rc = ENOMEM; + rc = -ENOMEM; ses->auth_key.len = 0; goto setup_ntlmv2_rsp_ret; } @@ -675,8 +724,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); ntlmv2->blob_signature = cpu_to_le32(0x00000101); ntlmv2->reserved = 0; - /* Must be within 5 minutes of the server */ - ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + ntlmv2->time = rsp_timestamp; + get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); ntlmv2->reserved2 = 0; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0a9fb6b53126a..6a1119e87fbb6 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -394,17 +394,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root) struct sockaddr *srcaddr; srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; - seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); + seq_show_option(s, "vers", tcon->ses->server->vals->version_string); cifs_show_security(s, tcon->ses); cifs_show_cache_flavor(s, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) seq_puts(s, ",multiuser"); else if (tcon->ses->user_name) - seq_printf(s, ",username=%s", tcon->ses->user_name); + seq_show_option(s, "username", tcon->ses->user_name); if (tcon->ses->domainName) - seq_printf(s, ",domain=%s", tcon->ses->domainName); + seq_show_option(s, "domain", tcon->ses->domainName); if (srcaddr->sa_family != AF_UNSPEC) { struct sockaddr_in *saddr4; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 252f5c15806bc..78a7b1d733543 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -31,19 +31,15 @@ * so that it will fit. We use hash_64 to convert the value to 31 bits, and * then add 1, to ensure that we don't end up with a 0 as the value. */ -#if BITS_PER_LONG == 64 static inline ino_t cifs_uniqueid_to_ino_t(u64 fileid) { + if ((sizeof(ino_t)) < (sizeof(u64))) + return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; + return (ino_t)fileid; + } -#else -static inline ino_t -cifs_uniqueid_to_ino_t(u64 fileid) -{ - return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; -} -#endif extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f26ffbfc64d8b..f1a5067d54947 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1395,11 +1395,10 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock, * current bigbuf. */ static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = get_rfc1002_length(server->smallbuf); int remaining = rfclen + 4 - server->total_read; - struct cifs_readdata *rdata = mid->callback_data; while (remaining > 0) { int length; @@ -1413,10 +1412,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) remaining -= length; } - dequeue_mid(mid, rdata->result); return 0; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + int length; + struct cifs_readdata *rdata = mid->callback_data; + + length = discard_remaining_data(server); + dequeue_mid(mid, rdata->result); + return length; +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1445,6 +1454,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; + if (server->ops->is_status_pending && + server->ops->is_status_pending(buf, server, 0)) { + discard_remaining_data(server); + return -1; + } + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8383d5ea42028..de626b939811b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -357,7 +357,6 @@ cifs_reconnect(struct TCP_Server_Info *server) server->session_key.response = NULL; server->session_key.len = 0; server->lstrp = jiffies; - mutex_unlock(&server->srv_mutex); /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); @@ -370,6 +369,7 @@ cifs_reconnect(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &retry_list); } spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_safe(tmp, tmp2, &retry_list) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f621b44cb8009..6b66dd5d15408 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2034,7 +2034,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, struct tcon_link *tlink = NULL; struct cifs_tcon *tcon = NULL; struct TCP_Server_Info *server; - struct cifs_io_parms io_parms; /* * To avoid spurious oplock breaks from server, in the case of @@ -2056,18 +2055,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, rc = -ENOSYS; cifsFileInfo_put(open_file); cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - unsigned int bytes_written; - - io_parms.netfid = open_file->fid.netfid; - io_parms.pid = open_file->pid; - io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = attrs->ia_size; - rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, - NULL, NULL, 1); - cifs_dbg(FYI, "Wrt seteof rc %d\n", rc); - } } else rc = -EINVAL; @@ -2093,28 +2080,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, else rc = -ENOSYS; cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - __u16 netfid; - int oplock = 0; - rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN, - GENERIC_WRITE, CREATE_NOT_DIR, &netfid, - &oplock, NULL, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (rc == 0) { - unsigned int bytes_written; - - io_parms.netfid = netfid; - io_parms.pid = current->tgid; - io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = attrs->ia_size; - rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, - NULL, 1); - cifs_dbg(FYI, "wrt seteof rc %d\n", rc); - CIFSSMBClose(xid, tcon, netfid); - } - } if (tlink) cifs_put_tlink(tlink); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 8b7898b7670f8..64a9bca976d05 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b1eede3678a91..3634c7adf7d20 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * if buggy server returns . and .. late do we want to * check for that here? */ + *tmp_buf = 0; rc = cifs_filldir(current_entry, file, ctx, tmp_buf, max_len); if (rc) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 54daee5ad4c10..1678b9cb94c7e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -50,9 +50,13 @@ change_conf(struct TCP_Server_Info *server) break; default: server->echoes = true; - server->oplocks = true; + if (enable_oplocks) { + server->oplocks = true; + server->oplock_credits = 1; + } else + server->oplocks = false; + server->echo_credits = 1; - server->oplock_credits = 1; } server->credits -= server->echo_credits + server->oplock_credits; return 0; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 54cbe19d9c087..657a9c5c4fff2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -46,6 +46,7 @@ #include "smb2status.h" #include "smb2glob.h" #include "cifspdu.h" +#include "cifs_spnego.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -427,19 +428,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) cifs_dbg(FYI, "missing security blob on negprot\n"); rc = cifs_enable_signing(server, ses->sign); -#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ if (rc) goto neg_exit; - if (blob_length) + if (blob_length) { rc = decode_negTokenInit(security_blob, blob_length, server); - if (rc == 1) - rc = 0; - else if (rc == 0) { - rc = -EIO; - goto neg_exit; + if (rc == 1) + rc = 0; + else if (rc == 0) + rc = -EIO; } -#endif - neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; @@ -533,7 +530,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ struct TCP_Server_Info *server = ses->server; u16 blob_length = 0; - char *security_blob; + struct key *spnego_key = NULL; + char *security_blob = NULL; char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ @@ -561,7 +559,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, ses->ntlmssp->sesskey_per_smbsess = true; /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ - ses->sectype = RawNTLMSSP; + if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP) + ses->sectype = RawNTLMSSP; ssetup_ntlmssp_authenticate: if (phase == NtLmChallenge) @@ -590,7 +589,48 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field and 1 for pad */ iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; - if (phase == NtLmNegotiate) { + + if (ses->sectype == Kerberos) { +#ifdef CONFIG_CIFS_UPCALL + struct cifs_spnego_msg *msg; + + spnego_key = cifs_get_spnego_key(ses); + if (IS_ERR(spnego_key)) { + rc = PTR_ERR(spnego_key); + spnego_key = NULL; + goto ssetup_exit; + } + + msg = spnego_key->payload.data; + /* + * check version field to make sure that cifs.upcall is + * sending us a response in an expected form + */ + if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { + cifs_dbg(VFS, + "bad cifs.upcall version. Expected %d got %d", + CIFS_SPNEGO_UPCALL_VERSION, msg->version); + rc = -EKEYREJECTED; + goto ssetup_exit; + } + ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, + GFP_KERNEL); + if (!ses->auth_key.response) { + cifs_dbg(VFS, + "Kerberos can't allocate (%u bytes) memory", + msg->sesskey_len); + rc = -ENOMEM; + goto ssetup_exit; + } + ses->auth_key.len = msg->sesskey_len; + blob_length = msg->secblob_len; + iov[1].iov_base = msg->data + msg->sesskey_len; + iov[1].iov_len = blob_length; +#else + rc = -EOPNOTSUPP; + goto ssetup_exit; +#endif /* CONFIG_CIFS_UPCALL */ + } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */ ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE), GFP_KERNEL); if (ntlmssp_blob == NULL) { @@ -613,6 +653,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, /* with raw NTLMSSP we don't encapsulate in SPNEGO */ security_blob = ntlmssp_blob; } + iov[1].iov_base = security_blob; + iov[1].iov_len = blob_length; } else if (phase == NtLmAuthenticate) { req->hdr.SessionId = ses->Suid; ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, @@ -640,6 +682,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, } else { security_blob = ntlmssp_blob; } + iov[1].iov_base = security_blob; + iov[1].iov_len = blob_length; } else { cifs_dbg(VFS, "illegal ntlmssp phase\n"); rc = -EIO; @@ -651,8 +695,6 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */ - 4 /* rfc1001 len */); req->SecurityBufferLength = cpu_to_le16(blob_length); - iov[1].iov_base = security_blob; - iov[1].iov_len = blob_length; inc_rfc1001_len(req, blob_length - 1 /* pad */); @@ -663,6 +705,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, kfree(security_blob); rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; + ses->Suid = rsp->hdr.SessionId; if (resp_buftype != CIFS_NO_BUFFER && rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { if (phase != NtLmNegotiate) { @@ -680,7 +723,6 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, /* NTLMSSP Negotiate sent now processing challenge (response) */ phase = NtLmChallenge; /* process ntlmssp challenge */ rc = 0; /* MORE_PROCESSING is not an error here but expected */ - ses->Suid = rsp->hdr.SessionId; rc = decode_ntlmssp_challenge(rsp->Buffer, le16_to_cpu(rsp->SecurityBufferLength), ses); } @@ -737,6 +779,10 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, kfree(ses->auth_key.response); ses->auth_key.response = NULL; } + if (spnego_key) { + key_invalidate(spnego_key); + key_put(spnego_key); + } kfree(ses->ntlmssp); return rc; @@ -996,21 +1042,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, { char *data_offset; struct create_context *cc; - unsigned int next = 0; + unsigned int next; + unsigned int remaining; char *name; data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); + remaining = le32_to_cpu(rsp->CreateContextsLength); cc = (struct create_context *)data_offset; - do { - cc = (struct create_context *)((char *)cc + next); + while (remaining >= sizeof(struct create_context)) { name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) != 4 || - strncmp(name, "RqLs", 4)) { - next = le32_to_cpu(cc->Next); - continue; - } - return server->ops->parse_lease_buf(cc, epoch); - } while (next != 0); + if (le16_to_cpu(cc->NameLength) == 4 && + strncmp(name, "RqLs", 4) == 0) + return server->ops->parse_lease_buf(cc, epoch); + + next = le32_to_cpu(cc->Next); + if (!next) + break; + remaining -= next; + cc = (struct create_context *)((char *)cc + next); + } return 0; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 126f46b887cc8..66106f6ed7b4d 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, cifs_in_send_dec(server); cifs_save_when_sent(mid); - if (rc < 0) + if (rc < 0) { server->sequence_number -= 2; + cifs_delete_mid(mid); + } + mutex_unlock(&server->srv_mutex); if (rc == 0) return 0; - cifs_delete_mid(mid); add_credits_and_wake_if(server, credits, optype); return rc; } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 5373567420912..a8f3b589a2dfe 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -129,8 +129,6 @@ void configfs_release_fs(void) } -static struct kobject *config_kobj; - static int __init configfs_init(void) { int err = -ENOMEM; @@ -141,8 +139,8 @@ static int __init configfs_init(void) if (!configfs_dir_cachep) goto out; - config_kobj = kobject_create_and_add("config", kernel_kobj); - if (!config_kobj) + err = sysfs_create_mount_point(kernel_kobj, "config"); + if (err) goto out2; err = register_filesystem(&configfs_fs_type); @@ -152,7 +150,7 @@ static int __init configfs_init(void) return 0; out3: pr_err("Unable to register filesystem!\n"); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); out2: kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; @@ -163,7 +161,7 @@ static int __init configfs_init(void) static void __exit configfs_exit(void) { unregister_filesystem(&configfs_fs_type); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; } diff --git a/fs/coredump.c b/fs/coredump.c index bbbe139ab2802..8dd099dc5f9b2 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -506,10 +506,10 @@ void do_coredump(const siginfo_t *siginfo) const struct cred *old_cred; struct cred *cred; int retval = 0; - int flag = 0; int ispipe; struct files_struct *displaced; - bool need_nonrelative = false; + /* require nonrelative corefile path and be extra careful */ + bool need_suid_safe = false; bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -543,9 +543,8 @@ void do_coredump(const siginfo_t *siginfo) */ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ - flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_nonrelative = true; + need_suid_safe = true; } retval = coredump_wait(siginfo->si_signo, &core_state); @@ -626,7 +625,7 @@ void do_coredump(const siginfo_t *siginfo) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - if (need_nonrelative && cn.corename[0] != '/') { + if (need_suid_safe && cn.corename[0] != '/') { printk(KERN_WARNING "Pid %d(%s) can only dump core "\ "to fully qualified path!\n", task_tgid_vnr(current), current->comm); @@ -634,8 +633,35 @@ void do_coredump(const siginfo_t *siginfo) goto fail_unlock; } + /* + * Unlink the file if it exists unless this is a SUID + * binary - in that case, we're running around with root + * privs and don't want to unlink another user's coredump. + */ + if (!need_suid_safe) { + mm_segment_t old_fs; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* + * If it doesn't exist, that's fine. If there's some + * other problem, we'll catch it at the filp_open(). + */ + (void) sys_unlink((const char __user *)cn.corename); + set_fs(old_fs); + } + + /* + * There is a race between unlinking and creating the + * file, but if that causes an EEXIST here, that's + * fine - another process raced with us while creating + * the corefile, and the other process won. To userspace, + * what matters is that at least one of the two processes + * writes its coredump successfully, not which one. + */ cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + O_CREAT | 2 | O_NOFOLLOW | + O_LARGEFILE | O_EXCL, 0600); if (IS_ERR(cprm.file)) goto fail_unlock; @@ -652,11 +678,15 @@ void do_coredump(const siginfo_t *siginfo) if (!S_ISREG(inode->i_mode)) goto close_fail; /* - * Dont allow local users get cute and trick others to coredump - * into their pre-created files. + * Don't dump core if the filesystem changed owner or mode + * of the file during file creation. This is an issue when + * a process dumps core while its cwd is e.g. on a vfat + * filesystem. */ if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; + if ((inode->i_mode & 0677) != 0600) + goto close_fail; if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) diff --git a/fs/dcache.c b/fs/dcache.c index 37b5afdaf6989..10bce74c427f0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } -/* - * Make sure other CPUs see the inode attached before the type is set. - */ static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) @@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, unsigned flags; dentry->d_inode = inode; - smp_wmb(); flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); flags |= type_flags; WRITE_ONCE(dentry->d_flags, flags); } -/* - * Ideally, we want to make sure that other CPUs see the flags cleared before - * the inode is detached, but this is really a violation of RCU principles - * since the ordering suggests we should always set inode before flags. - * - * We should instead replace or discard the entire dentry - but that sucks - * performancewise on mass deletion/rename. - */ static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); - smp_wmb(); dentry->d_inode = NULL; } @@ -322,17 +309,17 @@ static void dentry_free(struct dentry *dentry) } /** - * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups * @dentry: the target dentry * After this call, in-progress rcu-walk path lookup will fail. This * should be called after unhashing, and after changing d_inode (if * the dentry has not already been unhashed). */ -static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +static inline void dentry_rcuwalk_invalidate(struct dentry *dentry) { - assert_spin_locked(&dentry->d_lock); - /* Go through a barrier */ - write_seqcount_barrier(&dentry->d_seq); + lockdep_assert_held(&dentry->d_lock); + /* Go through am invalidation barrier */ + write_seqcount_invalidate(&dentry->d_seq); } /* @@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - dentry_rcuwalk_barrier(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -494,7 +483,7 @@ void __d_drop(struct dentry *dentry) __hlist_bl_del(&dentry->d_hash); dentry->d_hash.pprev = NULL; hlist_bl_unlock(b); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -642,7 +631,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -697,7 +686,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -776,6 +765,9 @@ void dput(struct dentry *dentry) if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; @@ -1673,7 +1665,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_DELETE )); + DCACHE_OP_DELETE | + DCACHE_OP_SELECT_INODE)); dentry->d_op = op; if (!op) return; @@ -1689,6 +1682,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; + if (op->d_select_inode) + dentry->d_flags |= DCACHE_OP_SELECT_INODE; } EXPORT_SYMBOL(d_set_d_op); @@ -1751,8 +1746,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); - dentry_rcuwalk_barrier(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -2920,6 +2916,13 @@ static int prepend_path(const struct path *path, if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); + /* Escaped? */ + if (dentry != vfsmnt->mnt_root) { + bptr = *buffer; + blen = *buflen; + error = 3; + break; + } /* Global root? */ if (mnt != parent) { dentry = ACCESS_ONCE(mnt->mnt_mountpoint); @@ -2927,17 +2930,6 @@ static int prepend_path(const struct path *path, vfsmnt = &mnt->mnt; continue; } - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || - dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, - dentry->d_name.name); - } if (!error) error = is_mounted(vfsmnt) ? 1 : 2; break; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c1e7ffb0dab65..8bec8f1e4b310 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -276,8 +276,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } @@ -716,20 +720,17 @@ bool debugfs_initialized(void) } EXPORT_SYMBOL_GPL(debugfs_initialized); - -static struct kobject *debug_kobj; - static int __init debugfs_init(void) { int retval; - debug_kobj = kobject_create_and_add("debug", kernel_kobj); - if (!debug_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "debug"); + if (retval) + return retval; retval = register_filesystem(&debug_fs_type); if (retval) - kobject_put(debug_kobj); + sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index add566303c684..91360444adf5e 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -569,6 +569,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_unlock(&allocated_ptys_lock); } +/* + * pty code needs to hold extra references in case of last /dev/tty close + */ + +void devpts_add_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + atomic_inc(&sb->s_active); + ihold(ptmx_inode); +} + +void devpts_del_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + iput(ptmx_inode); + deactivate_super(sb); +} + /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master diff --git a/fs/direct-io.c b/fs/direct-io.c index 745d2342651a0..d83a021a659fd 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1159,6 +1159,16 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } } + /* Once we sampled i_size check for reads beyond EOF */ + dio->i_size = i_size_read(inode); + if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { + if (dio->flags & DIO_LOCKING) + mutex_unlock(&inode->i_mutex); + kmem_cache_free(dio_cache, dio); + retval = 0; + goto out; + } + /* * For file extending writes updating i_size before data writeouts * complete can expose uninitialized blocks in dumb filesystems. @@ -1212,7 +1222,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, sdio.next_block_for_io = -1; dio->iocb = iocb; - dio->i_size = i_size_read(inode); spin_lock_init(&dio->bio_lock); dio->refcount = 1; diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 8db0b464483f9..63cd2c147221a 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -45,20 +45,20 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - int rc; - - if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) - return 1; + int rc = 1; if (flags & LOOKUP_RCU) return -ECHILD; - rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) + rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (d_really_is_positive(dentry)) { - struct inode *lower_inode = - ecryptfs_inode_to_lower(d_inode(dentry)); + struct inode *inode = d_inode(dentry); - fsstack_copy_attr_all(d_inode(dentry), lower_inode); + fsstack_copy_attr_all(inode, ecryptfs_inode_to_lower(inode)); + if (!inode->i_nlink) + return 0; } return rc; } diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d418431818187..e770c1ee4613e 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) return 0; } + err = handle->h_err; if (!handle->h_transaction) { - err = jbd2_journal_stop(handle); - return handle->h_err ? handle->h_err : err; + rc = jbd2_journal_stop(handle); + return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; rc = jbd2_journal_stop(handle); if (!err) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e003a1e81dc35..87ba10d1d3bcc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 9588240195090..94ae6874c2cb8 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); - return -ENOSPC; + return -EUCLEAN; } /* Set up for the direct block allocation */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0554b0b5957bb..2b3a53a515822 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1342,7 +1342,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1363,14 +1363,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we @@ -1701,19 +1710,32 @@ static int __ext4_journalled_writepage(struct page *page, ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); } - /* As soon as we unlock the page, it can go away, but we have - * references to buffers so we are safe */ + /* + * We need to release the page lock before we start the + * journal, so grab a reference so the page won't disappear + * out from under us. + */ + get_page(page); unlock_page(page); handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, ext4_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out; + put_page(page); + goto out_no_pagelock; } - BUG_ON(!ext4_handle_valid(handle)); + lock_page(page); + put_page(page); + if (page->mapping != mapping) { + /* The page got truncated from under us */ + ext4_journal_stop(handle); + ret = 0; + goto out; + } + if (inline_data) { BUFFER_TRACE(inode_bh, "get write access"); ret = ext4_journal_get_write_access(handle, inode_bh); @@ -1739,6 +1761,8 @@ static int __ext4_journalled_writepage(struct page *page, NULL, bput_one); ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: + unlock_page(page); +out_no_pagelock: brelse(inode_bh); return ret; } @@ -3109,29 +3133,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * case, we allocate an io_end structure to hook to the iocb. */ iocb->private = NULL; - ext4_inode_aio_set(inode, NULL); - if (!is_sync_kiocb(iocb)) { - io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) { - ret = -ENOMEM; - goto retake_lock; - } - /* - * Grab reference for DIO. Will be dropped in ext4_end_io_dio() - */ - iocb->private = ext4_get_io_end(io_end); - /* - * we save the io structure for current async direct - * IO, so that later ext4_map_blocks() could flag the - * io structure whether there is a unwritten extents - * needs to be converted when IO is completed. - */ - ext4_inode_aio_set(inode, io_end); - } - if (overwrite) { get_block_func = ext4_get_block_write_nolock; } else { + ext4_inode_aio_set(inode, NULL); + if (!is_sync_kiocb(iocb)) { + io_end = ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } + /* + * Grab reference for DIO. Will be dropped in + * ext4_end_io_dio() + */ + iocb->private = ext4_get_io_end(io_end); + /* + * we save the io structure for current async direct + * IO, so that later ext4_map_blocks() could flag the + * io structure whether there is a unwritten extents + * needs to be converted when IO is completed. + */ + ext4_inode_aio_set(inode, io_end); + } get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } @@ -4345,7 +4369,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8d1e60214ef0a..41260489d3bcd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4800,18 +4800,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b52374e421022..6163ad21cb0ef 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } @@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 370420bfae8d7..7da8ac1047f8f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -268,11 +268,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; - int err2, jblocks, retries = 0; + int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; + struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because @@ -383,8 +384,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - *err = __block_write_begin(pagep[0], from, replaced_size, - ext4_get_block); + if (!page_has_buffers(pagep[0])) + create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); + bh = page_buffers(pagep[0]); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + for (i = 0; i < block_len_in_page; i++) { + *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); + if (*err < 0) + break; + } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5765f88b39049..8082565c59a92 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -426,6 +426,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; + int nr_to_submit = 0; blocksize = 1 << inode->i_blkbits; @@ -478,11 +479,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } set_buffer_async_write(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); bh = head = page_buffers(page); - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && + nr_to_submit) { data_page = ext4_encrypt(inode, page); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index cf0c472047e3a..0e783b9f70071 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ca9d4a2fed415..8a3b9f14d1988 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -324,6 +324,22 @@ static void save_error_info(struct super_block *sb, const char *func, ext4_commit_super(sb, 1); } +/* + * The del_gendisk() function uninitializes the disk-specific data + * structures, including the bdi structure, without telling anyone + * else. Once this happens, any attempt to call mark_buffer_dirty() + * (for example, by ext4_commit_super), will cause a kernel OOPS. + * This is a kludge to prevent these oops until we can put in a proper + * hook in del_gendisk() to inform the VFS and file system layers. + */ +static int block_device_ejected(struct super_block *sb) +{ + struct inode *bd_inode = sb->s_bdev->bd_inode; + struct backing_dev_info *bdi = inode_to_bdi(bd_inode); + + return bdi->dev == NULL; +} + static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) { struct super_block *sb = journal->j_private; @@ -380,9 +396,13 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= MS_RDONLY; } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); + } } #define ext4_error_ratelimit(sb) \ @@ -571,8 +591,12 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs panic from previous error\n"); + } } void __ext4_msg(struct super_block *sb, @@ -807,6 +831,7 @@ static void ext4_put_super(struct super_block *sb) dump_orphan_list(sb, sbi); J_ASSERT(list_empty(&sbi->s_orphan)); + sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { /* @@ -1737,10 +1762,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq, } if (sbi->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); if (sbi->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); #endif } @@ -4590,7 +4615,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; - if (!sbh) + if (!sbh || block_device_ejected(sb)) return error; if (buffer_write_io_error(sbh)) { /* @@ -4806,10 +4831,11 @@ static int ext4_freeze(struct super_block *sb) error = jbd2_journal_flush(journal); if (error < 0) goto out; + + /* Journal blocked and flushed, clear needs_recovery flag. */ + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); } - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: if (journal) @@ -4827,8 +4853,11 @@ static int ext4_unfreeze(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - /* Reset the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + if (EXT4_SB(sb)->s_journal) { + /* Reset the needs_recovery flag before the fs is unlocked. */ + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + } + ext4_commit_super(sb, 1); return 0; } @@ -4943,6 +4972,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + if (*flags & MS_LAZYTIME) + sb->s_flags |= MS_LAZYTIME; + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f397..708d697113fc9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1026,6 +1026,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); + fc->release = fuse_free_conn; fc->dev = sb->s_dev; fc->sb = sb; @@ -1040,7 +1041,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= MS_POSIXACL; - fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; @@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void) } static struct kobject *fuse_kobj; -static struct kobject *connections_kobj; static int fuse_sysfs_init(void) { @@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void) goto out_err; } - connections_kobj = kobject_create_and_add("connections", fuse_kobj); - if (!connections_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fuse_kobj, "connections"); + if (err) goto out_fuse_unregister; - } return 0; @@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void) static void fuse_sysfs_cleanup(void) { - kobject_put(connections_kobj); + sysfs_remove_mount_point(fuse_kobj, "connections"); kobject_put(fuse_kobj); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 859c6edbf81a0..c18b49dc5d4f6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1334,11 +1334,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) if (is_ancestor(root, sdp->sd_master_dir)) seq_puts(s, ",meta"); if (args->ar_lockproto[0]) - seq_printf(s, ",lockproto=%s", args->ar_lockproto); + seq_show_option(s, "lockproto", args->ar_lockproto); if (args->ar_locktable[0]) - seq_printf(s, ",locktable=%s", args->ar_locktable); + seq_show_option(s, "locktable", args->ar_locktable); if (args->ar_hostdata[0]) - seq_printf(s, ",hostdata=%s", args->ar_hostdata); + seq_show_option(s, "hostdata", args->ar_hostdata); if (args->ar_spectator) seq_puts(s, ",spectator"); if (args->ar_localflocks) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index d3fa6bd9503e7..221719eac5de6 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -398,11 +397,11 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) void hfs_bnode_free(struct hfs_bnode *node) { - //int i; + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); kfree(node); } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 9f4ee7f520261..6fc766df04617 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -131,13 +131,16 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len) hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -166,9 +169,6 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len) goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -366,6 +366,8 @@ static int hfs_brec_update_parent(struct hfs_find_data *fd) if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index eee7206c38d18..410b65eea683d 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -135,9 +135,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root) struct hfs_sb_info *sbi = HFS_SB(root->d_sb); if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) - seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); + seq_show_option_n(seq, "creator", (char *)&sbi->s_creator, 4); if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) - seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); + seq_show_option_n(seq, "type", (char *)&sbi->s_type, 4); seq_printf(seq, ",uid=%u,gid=%u", from_kuid_munged(&init_user_ns, sbi->s_uid), from_kgid_munged(&init_user_ns, sbi->s_gid)); diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 759708fd9331c..63924662aaf3e 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -454,7 +454,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -566,13 +565,11 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) void hfs_bnode_free(struct hfs_bnode *node) { -#if 0 int i; for (i = 0; i < node->tree->pages_per_bnode; i++) if (node->page[i]) page_cache_release(node->page[i]); -#endif kfree(node); } diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index c90b72ee676d8..bb806e58c9770 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -218,9 +218,9 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root) struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb); if (sbi->creator != HFSPLUS_DEF_CR_TYPE) - seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); + seq_show_option_n(seq, "creator", (char *)&sbi->creator, 4); if (sbi->type != HFSPLUS_DEF_CR_TYPE) - seq_printf(seq, ",type=%.4s", (char *)&sbi->type); + seq_show_option_n(seq, "type", (char *)&sbi->type, 4); seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, from_kuid_munged(&init_user_ns, sbi->uid), from_kgid_munged(&init_user_ns, sbi->gid)); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 07d8d8f52faf5..f895a85d93048 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -260,7 +260,7 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root) size_t offset = strlen(root_ino) + 1; if (strlen(root_path) > offset) - seq_printf(seq, ",%s", root_path + offset); + seq_show_option(seq, root_path + offset, NULL); if (append) seq_puts(seq, ",append"); @@ -730,15 +730,13 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, init_special_inode(inode, mode, dev); err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); - if (!err) + if (err) goto out_free; err = read_name(inode, name); __putname(name); if (err) goto out_put; - if (err) - goto out_put; d_instantiate(dentry, inode); return 0; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index a0872f239f04f..b5f3cc7274f62 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -8,6 +8,17 @@ #include #include "hpfs_fn.h" +static void hpfs_update_directory_times(struct inode *dir) +{ + time_t t = get_seconds(); + if (t == dir->i_mtime.tv_sec && + t == dir->i_ctime.tv_sec) + return; + dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t; + dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0; + hpfs_write_inode_nolock(dir); +} + static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { const unsigned char *name = dentry->d_name.name; @@ -99,6 +110,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) result->i_mode = mode | S_IFDIR; hpfs_write_inode_nolock(result); } + hpfs_update_directory_times(dir); d_instantiate(dentry, result); hpfs_unlock(dir->i_sb); return 0; @@ -187,6 +199,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b result->i_mode = mode | S_IFREG; hpfs_write_inode_nolock(result); } + hpfs_update_directory_times(dir); d_instantiate(dentry, result); hpfs_unlock(dir->i_sb); return 0; @@ -262,6 +275,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de insert_inode_hash(result); hpfs_write_inode_nolock(result); + hpfs_update_directory_times(dir); d_instantiate(dentry, result); brelse(bh); hpfs_unlock(dir->i_sb); @@ -340,6 +354,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy insert_inode_hash(result); hpfs_write_inode_nolock(result); + hpfs_update_directory_times(dir); d_instantiate(dentry, result); hpfs_unlock(dir->i_sb); return 0; @@ -362,12 +377,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); dnode_secno dno; int r; - int rep = 0; int err; hpfs_lock(dir->i_sb); hpfs_adjust_length(name, &len); -again: + err = -ENOENT; de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); if (!de) @@ -387,33 +401,9 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) hpfs_error(dir->i_sb, "there was error when removing dirent"); err = -EFSERROR; break; - case 2: /* no space for deleting, try to truncate file */ - + case 2: /* no space for deleting */ err = -ENOSPC; - if (rep++) - break; - - dentry_unhash(dentry); - if (!d_unhashed(dentry)) { - hpfs_unlock(dir->i_sb); - return -ENOSPC; - } - if (generic_permission(inode, MAY_WRITE) || - !S_ISREG(inode->i_mode) || - get_write_access(inode)) { - d_rehash(dentry); - } else { - struct iattr newattrs; - /*pr_info("truncating file before delete.\n");*/ - newattrs.ia_size = 0; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - err = notify_change(dentry, &newattrs, NULL); - put_write_access(inode); - if (!err) - goto again; - } - hpfs_unlock(dir->i_sb); - return -ENOSPC; + break; default: drop_nlink(inode); err = 0; @@ -423,6 +413,8 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) out1: hpfs_brelse4(&qbh); out: + if (!err) + hpfs_update_directory_times(dir); hpfs_unlock(dir->i_sb); return err; } @@ -477,6 +469,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) out1: hpfs_brelse4(&qbh); out: + if (!err) + hpfs_update_directory_times(dir); hpfs_unlock(dir->i_sb); return err; } @@ -595,7 +589,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto end1; } - end: +end: hpfs_i(i)->i_parent_dir = new_dir->i_ino; if (S_ISDIR(i->i_mode)) { inc_nlink(new_dir); @@ -610,6 +604,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, brelse(bh); } end1: + if (!err) { + hpfs_update_directory_times(old_dir); + hpfs_update_directory_times(new_dir); + } hpfs_unlock(i->i_sb); return err; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 7cd00d3a7c9b7..8685c655737f9 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); @@ -424,11 +427,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; diff --git a/fs/inode.c b/fs/inode.c index ea37cd17b53f0..6e342cadef81b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1693,8 +1693,8 @@ int file_remove_suid(struct file *file) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; + if (!error) + inode_has_no_xattr(inode); return error; } diff --git a/fs/internal.h b/fs/internal.h index 01dce1d1476b7..4d5af583ab031 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -107,6 +107,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); extern int open_check_o_direct(struct file *f); +extern int vfs_open(const struct path *, struct file *, const struct cred *); /* * inode.c diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 988b32ed4c873..8c44654ce2748 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) unsigned long blocknr; if (is_journal_aborted(journal)) - return 1; + return -EIO; if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) return 1; @@ -405,10 +405,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * jbd2_cleanup_journal_tail() doesn't get called all that often. */ if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); - __jbd2_update_log_tail(journal, first_tid, blocknr); - return 0; + return __jbd2_update_log_tail(journal, first_tid, blocknr); } @@ -418,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * journal_clean_one_cp_list * * Find all the written-back checkpoint buffers in the given list and - * release them. + * release them. If 'destroy' is set, clean all buffers unconditionally. * * Called with j_list_lock held. * Returns 1 if we freed the transaction, 0 otherwise. */ -static int journal_clean_one_cp_list(struct journal_head *jh) +static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) { struct journal_head *last_jh; struct journal_head *next_jh = jh; @@ -437,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh) do { jh = next_jh; next_jh = jh->b_cpnext; - ret = __try_to_free_cp_buf(jh); + if (!destroy) + ret = __try_to_free_cp_buf(jh); + else + ret = __jbd2_journal_remove_checkpoint(jh) + 1; if (!ret) return freed; if (ret == 2) @@ -460,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh) * journal_clean_checkpoint_list * * Find all the written-back checkpoint buffers in the journal and release them. + * If 'destroy' is set, release all buffers unconditionally. * * Called with j_list_lock held. */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal) +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) { transaction_t *transaction, *last_transaction, *next_transaction; int ret; @@ -477,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); + ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, + destroy); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if @@ -493,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) * we can possibly see not yet submitted buffers on io_list */ ret = journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list); + t_checkpoint_io_list, destroy); if (need_resched()) return; /* @@ -506,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) } while (transaction != last_transaction); } +/* + * Remove buffers from all checkpoint lists as journal is aborted and we just + * need to free memory + */ +void jbd2_journal_destroy_checkpoint(journal_t *journal) +{ + /* + * We loop because __jbd2_journal_clean_checkpoint_list() may abort + * early due to a need of rescheduling. + */ + while (1) { + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) { + spin_unlock(&journal->j_list_lock); + break; + } + __jbd2_journal_clean_checkpoint_list(journal, true); + spin_unlock(&journal->j_list_lock); + cond_resched(); + } +} + /* * journal_remove_checkpoint: called after a buffer has been committed * to disk (either by being write-back flushed to disk, or being diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b73e0215baa7c..362e5f614450e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * frees some memory */ spin_lock(&journal->j_list_lock); - __jbd2_journal_clean_checkpoint_list(journal); + __jbd2_journal_clean_checkpoint_list(journal, false); spin_unlock(&journal->j_list_lock); jbd_debug(3, "JBD2: commit phase 1\n"); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b96bd8076b706..0469f32918a5a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, * * Requires j_checkpoint_mutex */ -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) { unsigned long freed; + int ret; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); @@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + if (ret) + goto out; + write_lock(&journal->j_state_lock); freed = block - journal->j_tail; if (block < journal->j_tail) @@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) journal->j_tail_sequence = tid; journal->j_tail = block; write_unlock(&journal->j_state_lock); + +out: + return ret; } /* @@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -static void jbd2_write_superblock(journal_t *journal, int write_op) +static int jbd2_write_superblock(journal_t *journal, int write_op) { struct buffer_head *bh = journal->j_sb_buffer; journal_superblock_t *sb = journal->j_superblock; @@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) printk(KERN_ERR "JBD2: Error %d detected when updating " "journal superblock for %s.\n", ret, journal->j_devname); + jbd2_journal_abort(journal, ret); } + + return ret; } /** @@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) * Update a journal's superblock information about log tail and write it to * disk, waiting for the IO to complete. */ -void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, +int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, unsigned long tail_block, int write_op) { journal_superblock_t *sb = journal->j_superblock; + int ret; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", @@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, sb->s_sequence = cpu_to_be32(tail_tid); sb->s_start = cpu_to_be32(tail_block); - jbd2_write_superblock(journal, write_op); + ret = jbd2_write_superblock(journal, write_op); + if (ret) + goto out; /* Log is no longer empty */ write_lock(&journal->j_state_lock); WARN_ON(!sb->s_sequence); journal->j_flags &= ~JBD2_FLUSHED; write_unlock(&journal->j_state_lock); + +out: + return ret; } /** @@ -1692,8 +1708,17 @@ int jbd2_journal_destroy(journal_t *journal) while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); mutex_lock(&journal->j_checkpoint_mutex); - jbd2_log_do_checkpoint(journal); + err = jbd2_log_do_checkpoint(journal); mutex_unlock(&journal->j_checkpoint_mutex); + /* + * If checkpointing failed, just free the buffers to avoid + * looping forever + */ + if (err) { + jbd2_journal_destroy_checkpoint(journal); + spin_lock(&journal->j_list_lock); + break; + } spin_lock(&journal->j_list_lock); } @@ -1950,7 +1975,14 @@ int jbd2_journal_flush(journal_t *journal) return -EIO; mutex_lock(&journal->j_checkpoint_mutex); - jbd2_cleanup_journal_tail(journal); + if (!err) { + err = jbd2_cleanup_journal_tail(journal); + if (err < 0) { + mutex_unlock(&journal->j_checkpoint_mutex); + goto out; + } + err = 0; + } /* Finally, mark the journal as really needing no recovery. * This sets s_start==0 in the underlying superblock, which is @@ -1966,7 +1998,8 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); write_unlock(&journal->j_state_lock); - return 0; +out: + return err; } /** @@ -2053,8 +2086,12 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) + if (errno) { jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); + } } /** diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index 3ea36554107fc..8918ac905a3b1 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking @@ -2,10 +2,6 @@ JFFS2 LOCKING DOCUMENTATION --------------------------- -At least theoretically, JFFS2 does not require the Big Kernel Lock -(BKL), which was always helpfully obtained for it by Linux 2.4 VFS -code. It has its own locking, as described below. - This document attempts to describe the existing locking rules for JFFS2. It is not expected to remain perfectly up to date, but ought to be fairly close. @@ -69,6 +65,7 @@ Ordering constraints: any f->sem held. 2. Never attempt to lock two file mutexes in one thread. No ordering rules have been made for doing so. + 3. Never lock a page cache page with f->sem held. erase_completion_lock spinlock diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index a3750f902adcb..c1f04947d7dcf 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -49,7 +49,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, - struct jffs2_inode_cache *ic) + struct jffs2_inode_cache *ic, + int *dir_hardlinks) { struct jffs2_full_dirent *fd; @@ -68,19 +69,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", fd->name, fd->ino, ic->ino); jffs2_mark_node_obsolete(c, fd->raw); + /* Clear the ic/raw union so it doesn't cause problems later. */ + fd->ic = NULL; continue; } + /* From this point, fd->raw is no longer used so we can set fd->ic */ + fd->ic = child_ic; + child_ic->pino_nlink++; + /* If we appear (at this stage) to have hard-linked directories, + * set a flag to trigger a scan later */ if (fd->type == DT_DIR) { - if (child_ic->pino_nlink) { - JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", - fd->name, fd->ino, ic->ino); - /* TODO: What do we do about it? */ - } else { - child_ic->pino_nlink = ic->ino; - } - } else - child_ic->pino_nlink++; + child_ic->flags |= INO_FLAGS_IS_DIR; + if (child_ic->pino_nlink > 1) + *dir_hardlinks = 1; + } dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); /* Can't free scan_dents so far. We might need them in pass 2 */ @@ -94,8 +97,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, */ static int jffs2_build_filesystem(struct jffs2_sb_info *c) { - int ret; - int i; + int ret, i, dir_hardlinks = 0; struct jffs2_inode_cache *ic; struct jffs2_full_dirent *fd; struct jffs2_full_dirent *dead_fds = NULL; @@ -119,7 +121,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) /* Now scan the directory tree, increasing nlink according to every dirent found. */ for_each_inode(i, c, ic) { if (ic->scan_dents) { - jffs2_build_inode_pass1(c, ic); + jffs2_build_inode_pass1(c, ic, &dir_hardlinks); cond_resched(); } } @@ -155,6 +157,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) } dbg_fsbuild("pass 2a complete\n"); + + if (dir_hardlinks) { + /* If we detected directory hardlinks earlier, *hopefully* + * they are gone now because some of the links were from + * dead directories which still had some old dirents lying + * around and not yet garbage-collected, but which have + * been discarded above. So clear the pino_nlink field + * in each directory, so that the final scan below can + * print appropriate warnings. */ + for_each_inode(i, c, ic) { + if (ic->flags & INO_FLAGS_IS_DIR) + ic->pino_nlink = 0; + } + } dbg_fsbuild("freeing temporary data structures\n"); /* Finally, we can scan again and free the dirent structs */ @@ -162,6 +178,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) while(ic->scan_dents) { fd = ic->scan_dents; ic->scan_dents = fd->next; + /* We do use the pino_nlink field to count nlink of + * directories during fs build, so set it to the + * parent ino# now. Now that there's hopefully only + * one. */ + if (fd->type == DT_DIR) { + if (!fd->ic) { + /* We'll have complained about it and marked the coresponding + raw node obsolete already. Just skip it. */ + continue; + } + + /* We *have* to have set this in jffs2_build_inode_pass1() */ + BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); + + /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks + * is set. Otherwise, we know this should never trigger anyway, so + * we don't do the check. And ic->pino_nlink still contains the nlink + * value (which is 1). */ + if (dir_hardlinks && fd->ic->pino_nlink) { + JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", + fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); + /* Should we unlink it from its previous parent? */ + } + + /* For directories, ic->pino_nlink holds that parent inode # */ + fd->ic->pino_nlink = ic->ino; + } jffs2_free_full_dirent(fd); } ic->scan_dents = NULL; @@ -240,11 +283,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, /* Reduce nlink of the child. If it's now zero, stick it on the dead_fds list to be cleaned up later. Else just free the fd */ - - if (fd->type == DT_DIR) - child_ic->pino_nlink = 0; - else - child_ic->pino_nlink--; + child_ic->pino_nlink--; if (!child_ic->pino_nlink) { dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index f509f62e12f6e..3361979d728c0 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; - uint32_t alloc_len = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; - jffs2_dbg(1, "%s()\n", __func__); - - if (pageofs > inode->i_size) { - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) - return ret; - } - - mutex_lock(&f->sem); pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) { - if (alloc_len) - jffs2_complete_reservation(c); - mutex_unlock(&f->sem); + if (!pg) return -ENOMEM; - } *pagep = pg; - if (alloc_len) { + jffs2_dbg(1, "%s()\n", __func__); + + if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; + uint32_t alloc_len; jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs); + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) + goto out_page; + + mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); @@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; + mutex_unlock(&f->sem); } /* @@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, * case of a short-copy. */ if (!PageUptodate(pg)) { + mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); + mutex_unlock(&f->sem); if (ret) goto out_page; } - mutex_unlock(&f->sem); jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); return ret; out_page: unlock_page(pg); page_cache_release(pg); - mutex_unlock(&f->sem); return ret; } diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 5a2dec2b064c9..95d5880a63ee1 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era BUG_ON(start > orig_start); } - /* First, use readpage() to read the appropriate page into the page cache */ - /* Q: What happens if we actually try to GC the _same_ page for which commit_write() - * triggered garbage collection in the first place? - * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the - * page OK. We'll actually write it out again in commit_write, which is a little - * suboptimal, but at least we're correct. - */ + /* The rules state that we must obtain the page lock *before* f->sem, so + * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's + * actually going to *change* so we're safe; we only allow reading. + * + * It is important to note that jffs2_write_begin() will ensure that its + * page is marked Uptodate before allocating space. That means that if we + * end up here trying to GC the *same* page that jffs2_write_begin() is + * trying to write out, read_cache_page() will not deadlock. */ + mutex_unlock(&f->sem); pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); + mutex_lock(&f->sem); if (IS_ERR(pg_ptr)) { pr_warn("read_cache_page() returned error: %ld\n", diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index fa35ff79ab358..0637271f37701 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -194,6 +194,7 @@ struct jffs2_inode_cache { #define INO_STATE_CLEARING 6 /* In clear_inode() */ #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ +#define INO_FLAGS_IS_DIR 0x02 /* is a directory */ #define RAWNODE_CLASS_INODE_CACHE 0 #define RAWNODE_CLASS_XATTR_DATUM 1 @@ -249,7 +250,10 @@ struct jffs2_readinode_info struct jffs2_full_dirent { - struct jffs2_raw_node_ref *raw; + union { + struct jffs2_raw_node_ref *raw; + struct jffs2_inode_cache *ic; /* Just during part of build */ + }; struct jffs2_full_dirent *next; uint32_t version; uint32_t ino; /* == zero for unlink */ diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fffca9517321c..2d48d28e16401 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; + if (parent->flags & KERNFS_EMPTY_DIR) + goto out_unlock; + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) goto out_unlock; @@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, return ERR_PTR(rc); } +/** + * kernfs_create_empty_dir - create an always empty directory + * @parent: parent in which to create a new directory + * @name: name of the new directory + * + * Returns the created node on success, ERR_PTR() value on failure. + */ +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name) +{ + struct kernfs_node *kn; + int rc; + + /* allocate */ + kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR); + if (!kn) + return ERR_PTR(-ENOMEM); + + kn->flags |= KERNFS_EMPTY_DIR; + kn->dir.root = parent->dir.root; + kn->ns = NULL; + kn->priv = NULL; + + /* link in */ + rc = kernfs_add_one(kn); + if (!rc) + return kn; + + kernfs_put(kn); + return ERR_PTR(rc); +} + static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&kernfs_mutex); error = -ENOENT; - if (!kernfs_active(kn) || !kernfs_active(new_parent)) + if (!kernfs_active(kn) || !kernfs_active(new_parent) || + (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; error = 0; diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 2da8493a380b8..756dd56aaf60a 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) case KERNFS_DIR: inode->i_op = &kernfs_dir_iops; inode->i_fop = &kernfs_dir_fops; + if (kn->flags & KERNFS_EMPTY_DIR) + make_empty_dir_inode(inode); break; case KERNFS_FILE: inode->i_size = kn->attr.size; diff --git a/fs/libfs.c b/fs/libfs.c index cb1fb4b9b6377..f4641fd27bda8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, return -EINVAL; } EXPORT_SYMBOL(simple_nosetlease); + + +/* + * Operations for a permanently empty directory. + */ +static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = d_inode(dentry); + generic_fillattr(inode, stat); + return 0; +} + +static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) +{ + return -EPERM; +} + +static int empty_dir_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + return -EOPNOTSUPP; +} + +static int empty_dir_removexattr(struct dentry *dentry, const char *name) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) +{ + return -EOPNOTSUPP; +} + +static const struct inode_operations empty_dir_inode_operations = { + .lookup = empty_dir_lookup, + .permission = generic_permission, + .setattr = empty_dir_setattr, + .getattr = empty_dir_getattr, + .setxattr = empty_dir_setxattr, + .getxattr = empty_dir_getxattr, + .removexattr = empty_dir_removexattr, + .listxattr = empty_dir_listxattr, +}; + +static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) +{ + /* An empty directory has two entries . and .. at offsets 0 and 1 */ + return generic_file_llseek_size(file, offset, whence, 2, 2); +} + +static int empty_dir_readdir(struct file *file, struct dir_context *ctx) +{ + dir_emit_dots(file, ctx); + return 0; +} + +static const struct file_operations empty_dir_operations = { + .llseek = empty_dir_llseek, + .read = generic_read_dir, + .iterate = empty_dir_readdir, + .fsync = noop_fsync, +}; + + +void make_empty_dir_inode(struct inode *inode) +{ + set_nlink(inode, 2); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_rdev = 0; + inode->i_size = 0; + inode->i_blkbits = PAGE_SHIFT; + inode->i_blocks = 0; + + inode->i_op = &empty_dir_inode_operations; + inode->i_fop = &empty_dir_operations; +} + +bool is_empty_dir_inode(struct inode *inode) +{ + return (inode->i_fop == &empty_dir_operations) && + (inode->i_op == &empty_dir_inode_operations); +} diff --git a/fs/locks.c b/fs/locks.c index 653faabb07f46..8501eecb2af0c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ -static int flock_lock_file(struct file *filp, struct file_lock *request) +static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; - struct inode *inode = file_inode(filp); int error = 0; bool found = false; LIST_HEAD(dispose); @@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, fl_list) { - if (filp != fl->fl_file) + if (request->fl_file != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; @@ -1164,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Variant of posix_lock_file_wait that does not take a filp, and so can be + * used after the filp has already been torn down. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1189,7 +1187,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } -EXPORT_SYMBOL(posix_lock_file_wait); +EXPORT_SYMBOL(posix_lock_inode_wait); /** * locks_mandatory_locked - Check for an active lock @@ -1851,18 +1849,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_file(filp, fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1874,8 +1872,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } - -EXPORT_SYMBOL(flock_lock_file_wait); +EXPORT_SYMBOL(flock_lock_inode_wait); /** * sys_flock: - flock() system call. @@ -2157,7 +2154,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2199,19 +2195,22 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - /* - * we need that spin_lock here - it prevents reordering between - * update of i_flctx->flc_posix and check for it done in close(). - * rcu_read_lock() wouldn't do. - */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -2297,7 +2296,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2339,14 +2337,22 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -2401,7 +2407,8 @@ locks_remove_flock(struct file *filp) .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; - struct file_lock_context *flctx = file_inode(filp)->i_flctx; + struct inode *inode = file_inode(filp); + struct file_lock_context *flctx = inode->i_flctx; if (list_empty(&flctx->flc_flock)) return; @@ -2409,7 +2416,7 @@ locks_remove_flock(struct file *filp) if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else - flock_lock_file(filp, &fl); + flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); diff --git a/fs/namei.c b/fs/namei.c index fe30d3be43a8b..f3cc848da8bc4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -505,6 +505,24 @@ struct nameidata { char *saved_names[MAX_NESTED_LINKS + 1]; }; +/** + * path_connected - Verify that a path->dentry is below path->mnt.mnt_root + * @path: nameidate to verify + * + * Rename can sometimes move a file or directory outside of a bind + * mount, path_connected allows those cases to be detected. + */ +static bool path_connected(const struct path *path) +{ + struct vfsmount *mnt = path->mnt; + + /* Only bind mounts can have disconnected paths */ + if (mnt->mnt_root == mnt->mnt_sb->s_root) + return true; + + return is_subdir(path->dentry, mnt->mnt_root); +} + /* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't @@ -1194,6 +1212,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) goto failed; nd->path.dentry = parent; nd->seq = seq; + if (unlikely(!path_connected(&nd->path))) + goto failed; break; } if (!follow_up_rcu(&nd->path)) @@ -1290,7 +1310,7 @@ static void follow_mount(struct path *path) } } -static void follow_dotdot(struct nameidata *nd) +static int follow_dotdot(struct nameidata *nd) { if (!nd->root.mnt) set_root(nd); @@ -1306,6 +1326,10 @@ static void follow_dotdot(struct nameidata *nd) /* rare case of legitimate dget_parent()... */ nd->path.dentry = dget_parent(nd->path.dentry); dput(old); + if (unlikely(!path_connected(&nd->path))) { + path_put(&nd->path); + return -ENOENT; + } break; } if (!follow_up(&nd->path)) @@ -1313,6 +1337,7 @@ static void follow_dotdot(struct nameidata *nd) } follow_mount(&nd->path); nd->inode = nd->path.dentry->d_inode; + return 0; } /* @@ -1428,8 +1453,6 @@ static int lookup_fast(struct nameidata *nd, negative = d_is_negative(dentry); if (read_seqcount_retry(&dentry->d_seq, seq)) return -ECHILD; - if (negative) - return -ENOENT; /* * This sequence count validates that the parent had no @@ -1450,6 +1473,12 @@ static int lookup_fast(struct nameidata *nd, goto unlazy; } } + /* + * Note: do negative dentry check after revalidation in + * case that drops it. + */ + if (negative) + return -ENOENT; path->mnt = mnt; path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode))) @@ -1541,7 +1570,7 @@ static inline int handle_dots(struct nameidata *nd, int type) if (follow_dotdot_rcu(nd)) return -ECHILD; } else - follow_dotdot(nd); + return follow_dotdot(nd); } return 0; } @@ -1590,10 +1619,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path, if (err < 0) goto out_err; - inode = path->dentry->d_inode; err = -ENOENT; if (d_is_negative(path->dentry)) goto out_path_put; + inode = path->dentry->d_inode; } if (should_follow_link(path->dentry, follow)) { @@ -2290,7 +2319,7 @@ mountpoint_last(struct nameidata *nd, struct path *path) if (unlikely(nd->last_type != LAST_NORM)) { error = handle_dots(nd, nd->last_type); if (error) - goto out; + return error; dentry = dget(nd->path.dentry); goto done; } @@ -3049,6 +3078,7 @@ static int do_last(struct nameidata *nd, struct path *path, path_to_nameidata(path, nd); goto out; } + inode = path->dentry->d_inode; finish_lookup: /* we _can_ be in RCU mode here */ if (should_follow_link(path->dentry, !symlink_ok)) { @@ -3123,6 +3153,10 @@ static int do_last(struct nameidata *nd, struct path *path, goto exit_fput; } out: + if (unlikely(error > 0)) { + WARN_ON(1); + error = -EINVAL; + } if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); diff --git a/fs/namespace.c b/fs/namespace.c index 1b9e11167baed..fce3cc1a3fa77 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1350,6 +1350,36 @@ enum umount_tree_flags { UMOUNT_PROPAGATE = 2, UMOUNT_CONNECTED = 4, }; + +static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how) +{ + /* Leaving mounts connected is only valid for lazy umounts */ + if (how & UMOUNT_SYNC) + return true; + + /* A mount without a parent has nothing to be connected to */ + if (!mnt_has_parent(mnt)) + return true; + + /* Because the reference counting rules change when mounts are + * unmounted and connected, umounted mounts may not be + * connected to mounted mounts. + */ + if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) + return true; + + /* Has it been requested that the mount remain connected? */ + if (how & UMOUNT_CONNECTED) + return false; + + /* Is the mount locked such that it needs to remain connected? */ + if (IS_MNT_LOCKED(mnt)) + return false; + + /* By default disconnect the mount */ + return true; +} + /* * mount_lock must be held * namespace_sem must be held for write @@ -1387,10 +1417,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - disconnect = !(((how & UMOUNT_CONNECTED) && - mnt_has_parent(p) && - (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || - IS_MNT_LOCKED_AND_LAZY(p)); + disconnect = disconnect_mount(p, how); pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, disconnect ? &unmounted : NULL); @@ -1527,11 +1554,8 @@ void __detach_mounts(struct dentry *dentry) while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { - struct mount *p, *tmp; - list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { - hlist_add_head(&p->mnt_umount.s_list, &unmounted); - umount_mnt(p); - } + hlist_add_head(&mnt->mnt_umount.s_list, &unmounted); + umount_mnt(mnt); } else umount_tree(mnt, UMOUNT_CONNECTED); } @@ -2332,6 +2356,8 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; } +static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -2363,6 +2389,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } + if (type->fs_flags & FS_USERNS_VISIBLE) { + if (!fs_fully_visible(type, &mnt_flags)) + return -EPERM; + } } mnt = vfs_kern_mount(type, flags, name, data); @@ -3164,9 +3194,10 @@ bool current_chrooted(void) return chrooted; } -bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; @@ -3185,16 +3216,36 @@ bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; - /* This mount is not fully visible if there are any child mounts - * that cover anything except for empty directories. + /* Verify the mount flags are equal to or more permissive + * than the proposed new mount. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(new_flags & MNT_READONLY)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(new_flags & MNT_NODEV)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) + continue; + + /* This mount is not fully visible if there are any + * locked child mounts that cover anything except for + * empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; - if (!S_ISDIR(inode->i_mode)) - goto next; - if (inode->i_nlink > 2) + /* Only worry about locked mounts */ + if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) + continue; + /* Is the directory permanetly empty? */ + if (!is_empty_dir_inode(inode)) goto next; } + /* Preserve the locked attributes */ + *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \ + MNT_LOCK_NODEV | \ + MNT_LOCK_ATIME); visible = true; goto found; next: ; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff36300..fdd234206dff9 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a46bf6de9ce45..fb1fb2774d346 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -628,23 +628,18 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } -static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) { int i; - for (i = 0; i < fl->num_fh; i++) { - if (!fl->fh_array[i]) - break; - kfree(fl->fh_array[i]); + if (fl->fh_array) { + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); } - kfree(fl->fh_array); - fl->fh_array = NULL; -} - -static void -_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) -{ - filelayout_free_fh_array(fl); kfree(fl); } @@ -715,21 +710,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, /* Do we want to use a mempool here? */ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) - goto out_err_free; + goto out_err; p = xdr_inline_decode(&stream, 4); if (unlikely(!p)) - goto out_err_free; + goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); - goto out_err_free; + goto out_err; } p = xdr_inline_decode(&stream, fl->fh_array[i]->size); if (unlikely(!p)) - goto out_err_free; + goto out_err; memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); dprintk("DEBUG: %s: fh len %d\n", __func__, fl->fh_array[i]->size); @@ -738,8 +733,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, __free_page(scratch); return 0; -out_err_free: - filelayout_free_fh_array(fl); out_err: __free_page(scratch); return -EIO; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6c..c2abdc7db6c33 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -631,7 +631,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } @@ -1039,6 +1039,11 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->res.verf->committed == NFS_DATA_SYNC) ff_layout_set_layoutcommit(hdr); + /* zero out fattr since we don't care DS attr at all */ + hdr->fattr.valid = 0; + if (task->tk_status >= 0) + nfs_writeback_update_inode(hdr); + return 0; } @@ -1479,11 +1484,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - if (ff_layout_encode_ioerr(flo, xdr, args)) - goto out; - + ff_layout_encode_ioerr(flo, xdr, args); ff_layout_encode_iostats(flo, xdr, args); -out: + *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12b..b28fa4cbea526 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; @@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: @@ -499,16 +500,19 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, range->offset, range->length)) continue; /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) - * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) + * + array length + deviceid(NFS4_DEVICEID4_SIZE) + * + status(4) + opnum(4) */ p = xdr_reserve_space(xdr, - 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); + 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); if (unlikely(!p)) return -ENOBUFS; p = xdr_encode_hyper(p, err->offset); p = xdr_encode_hyper(p, err->length); p = xdr_encode_opaque_fixed(p, &err->stateid, NFS4_STATEID_SIZE); + /* Encode 1 error */ + *p++ = cpu_to_be32(1); p = xdr_encode_opaque_fixed(p, &err->deviceid, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(err->status); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d244..723b8922d76b9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -442,7 +442,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); @@ -1242,9 +1242,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->nrequests == 0) + if (cur_size != new_isize) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } + if (nfsi->nrequests != 0) + invalid &= ~NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) @@ -1268,13 +1270,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; } -static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_FATTR_CTIME)) - return 0; - return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; -} - static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) @@ -1423,7 +1418,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n const struct nfs_inode *nfsi = NFS_I(inode); return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || - nfs_ctime_need_update(inode, fattr) || ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } @@ -1486,6 +1480,13 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr { unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + /* + * Don't revalidate the pagecache if we hold a delegation, but do + * force an attribute update + */ + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED; + if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); @@ -1626,6 +1627,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; + bool cache_revalidated = true; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1682,28 +1684,33 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE; + | NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) + } else { nfsi->cache_validity |= save_cache_validity; + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } else if (server->caps & NFS_CAP_MTIME) + } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } else if (server->caps & NFS_CAP_CTIME) + } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1715,7 +1722,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - invalid &= ~NFS_INO_REVAL_PAGECACHE; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -1724,19 +1730,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)cur_isize, (long long)new_isize); } - } else + } else { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - else if (server->caps & NFS_CAP_ATIME) + else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { @@ -1745,36 +1755,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - } else if (server->caps & NFS_CAP_MODE) + } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } else if (server->caps & NFS_CAP_OWNER) + } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) + } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1783,19 +1799,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); } - } else if (server->caps & NFS_CAP_NLINK) + } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } - if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; + else + cache_revalidated = false; /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { @@ -1805,16 +1824,24 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { - if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + if (cache_revalidated) { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, + nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + nfsi->attrtimeo <<= 1; + if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + } nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) nfsi->attr_gencount = fattr->gencount; } - invalid &= ~NFS_INO_INVALID_ATTR; + + /* Don't declare attrcache up to date if there were no attrs! */ + if (cache_revalidated) + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88be..9b04c2e6fffc3 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18d..5dea913baf46c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) return ret; idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); if (ret >= 0) clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3d..84706204cc336 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1152,6 +1152,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) return 0; if ((delegation->type & fmode) != fmode) return 0; + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) + return 0; if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return 0; nfs_mark_delegation_referenced(delegation); @@ -1204,15 +1206,19 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, static void nfs_resync_open_stateid_locked(struct nfs4_state *state) { + if (!(state->n_wronly || state->n_rdonly || state->n_rdwr)) + return; if (state->n_wronly) set_bit(NFS_O_WRONLY_STATE, &state->flags); if (state->n_rdonly) set_bit(NFS_O_RDONLY_STATE, &state->flags); if (state->n_rdwr) set_bit(NFS_O_RDWR_STATE, &state->flags); + set_bit(NFS_OPEN_STATE, &state->flags); } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, + nfs4_stateid *arg_stateid, nfs4_stateid *stateid, fmode_t fmode) { clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -1231,8 +1237,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (stateid == NULL) return; /* Handle races with OPEN */ - if (!nfs4_stateid_match_other(stateid, &state->open_stateid) || - !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) || + (nfs4_stateid_match_other(stateid, &state->open_stateid) && + !nfs4_stateid_is_newer(stateid, &state->open_stateid))) { nfs_resync_open_stateid_locked(state); return; } @@ -1241,10 +1248,12 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, nfs4_stateid_copy(&state->open_stateid, stateid); } -static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +static void nfs_clear_open_stateid(struct nfs4_state *state, + nfs4_stateid *arg_stateid, + nfs4_stateid *stateid, fmode_t fmode) { write_seqlock(&state->seqlock); - nfs_clear_open_stateid_locked(state, stateid, fmode); + nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode); write_sequnlock(&state->seqlock); if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(state->owner->so_server->nfs_client); @@ -1275,6 +1284,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ + spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { nfs4_stateid_copy(&state->stateid, deleg_stateid); @@ -1283,7 +1293,6 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s if (open_stateid != NULL) nfs_set_open_stateid_locked(state, open_stateid, fmode); write_sequnlock(&state->seqlock); - spin_lock(&state->owner->so_lock); update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } @@ -2322,9 +2331,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, dentry = d_add_unique(dentry, igrab(state->inode)); if (dentry == NULL) { dentry = opendata->dentry; - } else if (dentry != ctx->dentry) { + } else { dput(ctx->dentry); - ctx->dentry = dget(dentry); + ctx->dentry = dentry; } nfs_set_verifier(dentry, nfs_save_change_attribute(d_inode(opendata->dir))); @@ -2410,7 +2419,7 @@ static int _nfs4_do_open(struct inode *dir, goto err_free_label; state = ctx->state; - if ((opendata->o_arg.open_flags & O_EXCL) && + if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); @@ -2669,7 +2678,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) goto out_release; } } - nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode); + nfs_clear_open_stateid(state, &calldata->arg.stateid, + res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); @@ -5357,15 +5367,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) +static int do_vfs_lock(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: - res = posix_lock_file_wait(file, fl); + res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: - res = flock_lock_file_wait(file, fl); + res = flock_lock_inode_wait(inode, fl); break; default: BUG(); @@ -5425,7 +5435,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->fl.fl_file, &calldata->fl); + do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; @@ -5533,7 +5543,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + if (do_vfs_lock(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5674,7 +5684,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { + if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -5916,7 +5926,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); if (status < 0) goto out; down_read(&nfsi->rwsem); @@ -5924,7 +5934,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); up_read(&nfsi->rwsem); goto out; } @@ -8502,7 +8512,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, .init_client = nfs40_init_client, .shutdown_client = nfs40_shutdown_client, @@ -8528,7 +8537,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, @@ -8551,7 +8559,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 @@ -8568,6 +8575,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca22650..ddef1dc80cf7d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1482,6 +1482,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b393695106..93d355c8b4676 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { spin_lock(&hdr->lock); - if (pos < hdr->io_start + hdr->good_bytes) { - set_bit(NFS_IOHDR_ERROR, &hdr->flags); + if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags) + || pos < hdr->io_start + hdr->good_bytes) { clear_bit(NFS_IOHDR_EOF, &hdr->flags); hdr->good_bytes = pos - hdr->io_start; hdr->error = error; @@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * for it without upsetting the slab allocator. */ if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * - sizeof(struct page) > PAGE_SIZE) + sizeof(struct page *) > PAGE_SIZE) return 0; return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); @@ -1110,8 +1110,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; - if (desc->pg_error < 0) + if (desc->pg_error < 0) { + list_splice_tail(&head, &mirror->pg_list); + mirror->pg_recoalesce = 1; return 0; + } break; } } while (mirror->pg_recoalesce); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6a..d47c188682b16 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1821,6 +1821,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); @@ -1865,6 +1866,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1981,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f37e25b6311c8..1705c78ee2d8a 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -359,26 +359,31 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } +/* + * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does, + * declare a match. + */ static bool _same_data_server_addrs_locked(const struct list_head *dsaddrs1, const struct list_head *dsaddrs2) { struct nfs4_pnfs_ds_addr *da1, *da2; - - /* step through both lists, comparing as we go */ - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); - da1 != NULL && da2 != NULL; - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return false; + struct sockaddr *sa1, *sa2; + bool match = false; + + list_for_each_entry(da1, dsaddrs1, da_node) { + sa1 = (struct sockaddr *)&da1->da_addr; + match = false; + list_for_each_entry(da2, dsaddrs2, da_node) { + sa2 = (struct sockaddr *)&da2->da_addr; + match = same_sockaddr(sa1, sa2); + if (match) + break; + } + if (!match) + break; } - if (da1 == NULL && da2 == NULL) - return true; - - return false; + return match; } /* diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ae0ff7a11b403..01b8cc8e8cfc4 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; /* read path should never have more than one mirror */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a19..d9851a6a28138 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1203,7 +1203,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino return 1; if (!flctx || (list_empty_careful(&flctx->flc_flock) && list_empty_careful(&flctx->flc_posix))) - return 0; + return 1; /* Check to see if there are whole file write locks */ ret = 0; @@ -1289,6 +1289,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); @@ -1330,6 +1331,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; nfs_pageio_stop_mirroring(pgio); @@ -1382,24 +1386,27 @@ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, { struct nfs_pgio_args *argp = &hdr->args; struct nfs_pgio_res *resp = &hdr->res; + u64 size = argp->offset + resp->count; if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + fattr->size = size; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) { + fattr->valid &= ~NFS_ATTR_FATTR_SIZE; return; - if (argp->offset + resp->count != fattr->size) - return; - if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + } + if (size != fattr->size) return; /* Set attribute barrier */ nfs_fattr_set_barrier(fattr); + /* ...and update size */ + fattr->valid |= NFS_ATTR_FATTR_SIZE; } void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) { - struct nfs_fattr *fattr = hdr->res.fattr; + struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; - if (fattr == NULL) - return; spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index cdefaa331a071..c29d9421bd5e1 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -56,14 +56,6 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, u32 device_generation = 0; int error; - /* - * We do not attempt to support I/O smaller than the fs block size, - * or not aligned to it. - */ - if (args->lg_minlength < block_size) { - dprintk("pnfsd: I/O too small\n"); - goto out_layoutunavailable; - } if (seg->offset & (block_size - 1)) { dprintk("pnfsd: I/O misaligned\n"); goto out_layoutunavailable; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 039f9c8a95e84..bb6c324f1f3df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -765,25 +765,80 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } -static void +static bool unhash_delegation_locked(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; lockdep_assert_held(&state_lock); + if (list_empty(&dp->dl_perfile)) + return false; + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; @@ -792,16 +847,21 @@ unhash_delegation_locked(struct nfs4_delegation *dp) list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); + return true; } static void destroy_delegation(struct nfs4_delegation *dp) { + bool unhashed; + spin_lock(&state_lock); - unhash_delegation_locked(dp); + unhashed = unhash_delegation_locked(dp); spin_unlock(&state_lock); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + if (unhashed) { + put_clnt_odstate(dp->dl_clnt_odstate); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); + nfs4_put_stid(&dp->dl_stid); + } } static void revoke_delegation(struct nfs4_delegation *dp) @@ -1004,16 +1064,20 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) sop->so_ops->so_free(sop); } -static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) +static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); + if (list_empty(&stp->st_perfile)) + return false; + spin_lock(&fp->fi_lock); - list_del(&stp->st_perfile); + list_del_init(&stp->st_perfile); spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); + return true; } static void nfs4_free_ol_stateid(struct nfs4_stid *stid) @@ -1063,25 +1127,27 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, list_add(&stp->st_locks, reaplist); } -static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) +static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); list_del_init(&stp->st_locks); - unhash_ol_stateid(stp); nfs4_unhash_stid(&stp->st_stid); + return unhash_ol_stateid(stp); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + bool unhashed; spin_lock(&oo->oo_owner.so_client->cl_lock); - unhash_lock_stateid(stp); + unhashed = unhash_lock_stateid(stp); spin_unlock(&oo->oo_owner.so_client->cl_lock); - nfs4_put_stid(&stp->st_stid); + if (unhashed) + nfs4_put_stid(&stp->st_stid); } static void unhash_lockowner_locked(struct nfs4_lockowner *lo) @@ -1129,7 +1195,7 @@ static void release_lockowner(struct nfs4_lockowner *lo) while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); @@ -1142,21 +1208,26 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, { struct nfs4_ol_stateid *stp; + lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock); + while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); put_ol_stateid_locked(stp, reaplist); } } -static void unhash_open_stateid(struct nfs4_ol_stateid *stp, +static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { + bool unhashed; + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); - unhash_ol_stateid(stp); + unhashed = unhash_ol_stateid(stp); release_open_stateid_locks(stp, reaplist); + return unhashed; } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -1164,8 +1235,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); - unhash_open_stateid(stp, &reaplist); - put_ol_stateid_locked(stp, &reaplist); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); free_ol_stateid_reaplist(&reaplist); } @@ -1210,8 +1281,8 @@ static void release_openowner(struct nfs4_openowner *oo) while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_open_stateid(stp, &reaplist); - put_ol_stateid_locked(stp, &reaplist); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); @@ -1714,7 +1785,7 @@ __destroy_client(struct nfs4_client *clp) spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -3332,6 +3403,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + init_rwsem(&stp->st_rwsem); spin_lock(&oo->oo_owner.so_client->cl_lock); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -3921,6 +3993,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3952,16 +4036,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -3981,6 +4068,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -3999,9 +4095,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -4162,15 +4256,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4182,6 +4281,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4346,7 +4446,7 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = min(new_timeo, t); break; } - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -4397,9 +4497,9 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { - if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) + if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -4574,20 +4674,48 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, return nfs_ok; } +static struct file * +nfs4_find_file(struct nfs4_stid *s, int flags) +{ + switch (s->sc_type) { + case NFS4_DELEG_STID: + if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) + return NULL; + return get_file(s->sc_file->fi_deleg_file); + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + if (flags & RD_STATE) + return find_readable_file(s->sc_file); + else + return find_writeable_file(s->sc_file); + break; + } + + return NULL; +} + +static __be32 +nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) +{ + __be32 status; + + status = nfsd4_check_openowner_confirmed(ols); + if (status) + return status; + return nfs4_check_openmode(ols, flags); +} + /* -* Checks for stateid operations -*/ + * Checks for stateid operations + */ __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stid *s; - struct nfs4_ol_stateid *stp = NULL; - struct nfs4_delegation *dp = NULL; - struct svc_fh *current_fh = &cstate->current_fh; - struct inode *ino = d_inode(current_fh->fh_dentry); + struct svc_fh *fhp = &cstate->current_fh; + struct inode *ino = d_inode(fhp->fh_dentry); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct file *file = NULL; + struct nfs4_stid *s; __be32 status; if (filpp) @@ -4597,60 +4725,39 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(net, current_fh, stateid, flags); + return check_special_stateids(net, fhp, stateid, flags); status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); if (status) return status; - status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + status = check_stateid_generation(stateid, &s->sc_stateid, + nfsd4_has_session(cstate)); if (status) goto out; + switch (s->sc_type) { case NFS4_DELEG_STID: - dp = delegstateid(s); - status = nfs4_check_delegmode(dp, flags); - if (status) - goto out; - if (filpp) { - file = dp->dl_stid.sc_file->fi_deleg_file; - if (!file) { - WARN_ON_ONCE(1); - status = nfserr_serverfault; - goto out; - } - get_file(file); - } + status = nfs4_check_delegmode(delegstateid(s), flags); break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - stp = openlockstateid(s); - status = nfs4_check_fh(current_fh, stp); - if (status) - goto out; - status = nfsd4_check_openowner_confirmed(stp); - if (status) - goto out; - status = nfs4_check_openmode(stp, flags); - if (status) - goto out; - if (filpp) { - struct nfs4_file *fp = stp->st_stid.sc_file; - - if (flags & RD_STATE) - file = find_readable_file(fp); - else - file = find_writeable_file(fp); - } + status = nfs4_check_olstateid(fhp, openlockstateid(s), flags); break; default: status = nfserr_bad_stateid; + break; + } + if (status) goto out; + status = nfs4_check_fh(fhp, s); + + if (!status && filpp) { + *filpp = nfs4_find_file(s, flags); + if (!*filpp) + status = nfserr_serverfault; } - status = nfs_ok; - if (file) - *filpp = file; out: nfs4_put_stid(s); return status; @@ -4707,7 +4814,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_for_locks(stp->st_stid.sc_file, lockowner(stp->st_stateowner))) break; - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; @@ -4751,10 +4858,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, stp); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4801,6 +4911,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4831,11 +4942,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4914,6 +5028,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -4923,20 +5038,23 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; + bool unhashed; LIST_HEAD(reaplist); s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); - unhash_open_stateid(s, &reaplist); + unhashed = unhash_open_stateid(s, &reaplist); if (clp->cl_minorversion) { - put_ol_stateid_locked(s, &reaplist); + if (unhashed) + put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); - move_to_close_lru(s, clp->net); + if (unhashed) + move_to_close_lru(s, clp->net); } } @@ -4964,6 +5082,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5194,6 +5313,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5362,6 +5482,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5369,6 +5490,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5474,6 +5597,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5644,6 +5769,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5975,7 +6101,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, struct list_head *collect, - void (*func)(struct nfs4_ol_stateid *)) + bool (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; struct nfs4_ol_stateid *stp, *st_next; @@ -5989,9 +6115,9 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, list_for_each_entry_safe(lst, lst_next, &stp->st_locks, st_locks) { if (func) { - func(lst); - nfsd_inject_add_lock_to_list(lst, - collect); + if (func(lst)) + nfsd_inject_add_lock_to_list(lst, + collect); } ++count; /* @@ -6261,7 +6387,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, continue; atomic_inc(&clp->cl_refcount); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, victims); } ++count; @@ -6591,7 +6717,7 @@ nfs4_state_shutdown_net(struct net *net) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 158badf945df1..3dd1b616b92b3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2139,9 +2139,31 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } +static inline __be32 +nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type) +{ + __be32 *p; + + if (layout_type) { + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(layout_type); + } else { + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); + } + + return 0; +} + #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#define WORD2_ABSENT_FS_ATTRS 0 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 @@ -2170,7 +2192,7 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, { return 0; } #endif -static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) +static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 *rdattr_err) { /* As per referral draft: */ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || @@ -2183,6 +2205,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) } *bmval0 &= WORD0_ABSENT_FS_ATTRS; *bmval1 &= WORD1_ABSENT_FS_ATTRS; + *bmval2 &= WORD2_ABSENT_FS_ATTRS; return 0; } @@ -2246,8 +2269,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); if (exp->ex_fslocs.migrated) { - BUG_ON(bmval[2]); - status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); + status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err); if (status) goto out; } @@ -2290,8 +2312,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || - bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || + bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { err = security_inode_getsecctx(d_inode(dentry), &context, &contextlen); contextsupport = (err == 0); @@ -2691,20 +2713,16 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_encode_hyper(p, stat.ino); } #ifdef CONFIG_NFSD_PNFS - if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || - (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { - if (exp->ex_layout_type) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - *p++ = cpu_to_be32(exp->ex_layout_type); - } else { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(0); - } + if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); + if (status) + goto out; + } + + if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) { + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); + if (status) + goto out; } if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index dbc4f85a50082..67685b6cfef30 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -533,15 +533,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 92e48c70f0f05..39ddcaf0918f1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags) { struct fsnotify_mark *lmark, *mark; + LIST_HEAD(to_free); + /* + * We have to be really careful here. Anytime we drop mark_mutex, e.g. + * fsnotify_clear_marks_by_inode() can come and free marks. Even in our + * to_free list so we have to use mark_mutex even when accessing that + * list. And freeing mark requires us to drop mark_mutex. So we can + * reliably free only the first mark in the list. That's why we first + * move marks to free to to_free list in one go and then free marks in + * to_free list one by one. + */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) { - fsnotify_get_mark(mark); - fsnotify_destroy_mark_locked(mark, group); - fsnotify_put_mark(mark); - } + if (mark->flags & flags) + list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); + + while (1) { + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + if (list_empty(&to_free)) { + mutex_unlock(&group->mark_mutex); + break; + } + mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + fsnotify_get_mark(mark); + fsnotify_destroy_mark_locked(mark, group); + mutex_unlock(&group->mark_mutex); + fsnotify_put_mark(mark); + } } /* diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f906a250da6ad..9ea70127074d1 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -686,7 +686,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { u64 s = i_size_read(inode); - sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) + (do_div(s, osb->s_clustersize) >> 9); ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, @@ -911,7 +911,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); ret = blkdev_issue_zeroout(osb->sb->s_bdev, - p_cpos << (osb->s_clustersize_bits - 9), + (u64)p_cpos << (osb->s_clustersize_bits - 9), zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index fdf4b41d0609a..523e485a11b84 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, int found, ret; int set_maybe; int dispatch_assert = 0; + int dispatched = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1658,15 +1659,18 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); } - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return response; } @@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); item->u.am.lockres = res; /* already have a ref */ /* can optionally ignore node numbers higher than this node */ @@ -2515,6 +2518,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); + /* get an extra reference on the mle. + * otherwise the assert_master from the new + * master will destroy this. + */ + dlm_get_mle_inuse(mle); spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); @@ -2550,6 +2558,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, if (mle_added) { dlm_mle_detach_hb_events(dlm, mle); dlm_put_mle(mle); + dlm_put_mle_inuse(mle); } else if (mle) { kmem_cache_free(dlm_mle_cache, mle); mle = NULL; @@ -2567,17 +2576,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, * ensure that all assert_master work is flushed. */ flush_workqueue(dlm->dlm_worker); - /* get an extra reference on the mle. - * otherwise the assert_master from the new - * master will destroy this. - * also, make sure that all callers of dlm_get_mle - * take both dlm->spinlock and dlm->master_lock */ - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - dlm_get_mle_inuse(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - /* notify new node and send all lock state */ /* call send_one_lockres with migration flag. * this serves as notice to the target node that a @@ -3306,6 +3304,15 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) mle->new_master != dead_node) continue; + if (mle->new_master == dead_node && mle->inuse) { + mlog(ML_NOTICE, "%s: target %u died during " + "migration from %u, the MLE is " + "still keep used, ignore it!\n", + dlm->name, dead_node, + mle->master); + continue; + } + /* If we have reached this point, this mle needs to be * removed from the list and freed. */ dlm_clean_migration_mle(dlm, mle); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ce12e0b1a31f1..f25ff5d3a2f99 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, unsigned int hash; int master = DLM_LOCK_RES_OWNER_UNKNOWN; u32 flags = DLM_ASSERT_MASTER_REQUERY; + int dispatched = 0; if (!dlm_grab(dlm)) { /* since the domain has gone away on this @@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, dlm_put(dlm); /* sender will take care of this and retry */ return ret; - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { /* put.. incase we are not the master */ @@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, } spin_unlock(&dlm->spinlock); - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return master; } @@ -2356,6 +2360,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) break; } } + dlm_lockres_clear_refmap_bit(dlm, res, + dead_node); spin_unlock(&res->spinlock); continue; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8b23aa2f52dda..3623ab6fa97f2 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, unsigned int gen; int noqueue_attempted = 0; int dlm_locked = 0; + int kick_dc = 0; if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { mlog_errno(-EINVAL); @@ -1524,7 +1525,12 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, unlock: lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ + kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); + spin_unlock_irqrestore(&lockres->l_lock, flags); + if (kick_dc) + ocfs2_wake_downconvert_thread(osb); out: /* * This is helping work around a lock inversion between the page lock @@ -4025,9 +4031,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) osb->dc_work_sequence = osb->dc_wake_sequence; processed = osb->blocked_lock_count; - while (processed) { - BUG_ON(list_empty(&osb->blocked_lock_list)); - + /* + * blocked lock processing in this loop might call iput which can + * remove items off osb->blocked_lock_list. Downconvert up to + * 'processed' number of locks, but stop short if we had some + * removed in ocfs2_mark_lockres_freeing when downconverting. + */ + while (processed && !list_empty(&osb->blocked_lock_list)) { lockres = list_entry(osb->blocked_lock_list.next, struct ocfs2_lock_res, l_blocked_list); list_del_init(&lockres->l_blocked_list); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 176fe6afd94ec..4d5e0a573f4f2 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -365,6 +365,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 403c5660b3064..a482e312c7b28 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1550,8 +1550,8 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",localflocks,"); if (osb->osb_cluster_stack[0]) - seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, - osb->osb_cluster_stack); + seq_show_option_n(s, "cluster_stack", osb->osb_cluster_stack, + OCFS2_STACK_LABEL_LEN); if (opts & OCFS2_MOUNT_USRQUOTA) seq_printf(s, ",usrquota"); if (opts & OCFS2_MOUNT_GRPQUOTA) diff --git a/fs/open.c b/fs/open.c index 98e5a52dc68c9..f9d2bf9350996 100644 --- a/fs/open.c +++ b/fs/open.c @@ -678,18 +678,18 @@ int open_check_o_direct(struct file *f) } static int do_dentry_open(struct file *f, + struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; - struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); - inode = f->f_inode = f->f_path.dentry->d_inode; + f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { @@ -793,7 +793,8 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - error = do_dentry_open(file, open, current_cred()); + error = do_dentry_open(file, d_backing_inode(dentry), open, + current_cred()); if (!error) *opened |= FILE_OPENED; @@ -822,6 +823,28 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); +/** + * vfs_open - open the file at the given path + * @path: path to open + * @file: newly allocated file with f_flag initialized + * @cred: credentials to use + */ +int vfs_open(const struct path *path, struct file *file, + const struct cred *cred) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = dentry->d_inode; + + file->f_path = *path; + if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { + inode = dentry->d_op->d_select_inode(dentry, file->f_flags); + if (IS_ERR(inode)) + return PTR_ERR(inode); + } + + return do_dentry_open(file, inode, NULL, cred); +} + struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { @@ -853,26 +876,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -/** - * vfs_open - open the file at the given path - * @path: path to open - * @filp: newly allocated file with f_flag initialized - * @cred: credentials to use - */ -int vfs_open(const struct path *path, struct file *filp, - const struct cred *cred) -{ - struct inode *inode = path->dentry->d_inode; - - if (inode->i_op->dentry_open) - return inode->i_op->dentry_open(path->dentry, filp, cred); - else { - filp->f_path = *path; - return do_dentry_open(filp, NULL, cred); - } -} -EXPORT_SYMBOL(vfs_open); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 84d693d374284..758012bfd5f03 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -22,9 +22,9 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) { - ssize_t list_size, size; - char *buf, *name, *value; - int error; + ssize_t list_size, size, value_size = 0; + char *buf, *name, *value = NULL; + int uninitialized_var(error); if (!old->d_inode->i_op->getxattr || !new->d_inode->i_op->getxattr) @@ -41,29 +41,40 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) if (!buf) return -ENOMEM; - error = -ENOMEM; - value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); - if (!value) - goto out; - list_size = vfs_listxattr(old, buf, list_size); if (list_size <= 0) { error = list_size; - goto out_free_value; + goto out; } for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { - size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); - if (size <= 0) { +retry: + size = vfs_getxattr(old, name, value, value_size); + if (size == -ERANGE) + size = vfs_getxattr(old, name, NULL, 0); + + if (size < 0) { error = size; - goto out_free_value; + break; + } + + if (size > value_size) { + void *new; + + new = krealloc(value, size, GFP_KERNEL); + if (!new) { + error = -ENOMEM; + break; + } + value = new; + value_size = size; + goto retry; } + error = vfs_setxattr(new, name, value, size, 0); if (error) - goto out_free_value; + break; } - -out_free_value: kfree(value); out: kfree(buf); @@ -81,11 +92,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) if (len == 0) return 0; - old_file = ovl_path_open(old, O_RDONLY); + old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY); if (IS_ERR(old_file)) return PTR_ERR(old_file); - new_file = ovl_path_open(new, O_WRONLY); + new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY); if (IS_ERR(new_file)) { error = PTR_ERR(new_file); goto out_fput; @@ -267,7 +278,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, out_cleanup: ovl_cleanup(wdir, newdentry); - goto out; + goto out2; } /* diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 04f1248846877..a1b069e5e363e 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -45,6 +45,19 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) int err; struct dentry *upperdentry; + /* + * Check for permissions before trying to copy-up. This is redundant + * since it will be rechecked later by ->setattr() on upper dentry. But + * without this, copy-up can be triggered by just about anybody. + * + * We don't initialize inode->size, which just means that + * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not + * check for a swapfile (which this won't be anyway). + */ + err = inode_change_ok(dentry->d_inode, attr); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; @@ -336,37 +349,33 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, return true; } -static int ovl_dentry_open(struct dentry *dentry, struct file *file, - const struct cred *cred) +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) { int err; struct path realpath; enum ovl_path_type type; - bool want_write = false; + + if (d_is_dir(dentry)) + return d_backing_inode(dentry); type = ovl_path_real(dentry, &realpath); - if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { - want_write = true; + if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); if (err) - goto out; + return ERR_PTR(err); - if (file->f_flags & O_TRUNC) + if (file_flags & O_TRUNC) err = ovl_copy_up_last(dentry, NULL, true); else err = ovl_copy_up(dentry); + ovl_drop_write(dentry); if (err) - goto out_drop_write; + return ERR_PTR(err); ovl_path_upper(dentry, &realpath); } - err = vfs_open(&realpath, file, cred); -out_drop_write: - if (want_write) - ovl_drop_write(dentry); -out: - return err; + return d_backing_inode(realpath.dentry); } static const struct inode_operations ovl_file_inode_operations = { @@ -377,7 +386,6 @@ static const struct inode_operations ovl_file_inode_operations = { .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, .removexattr = ovl_removexattr, - .dentry_open = ovl_dentry_open, }; static const struct inode_operations ovl_symlink_inode_operations = { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 17ac5afc9ffbc..ea5a40b06e3ad 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,6 +173,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, struct ovl_entry *oe); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 907870e81a72e..adcb1398c4812 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -23,6 +23,7 @@ struct ovl_cache_entry { u64 ino; struct list_head l_node; struct rb_node node; + struct ovl_cache_entry *next_maybe_whiteout; bool is_whiteout; char name[]; }; @@ -39,7 +40,7 @@ struct ovl_readdir_data { struct rb_root root; struct list_head *list; struct list_head middle; - struct dentry *dir; + struct ovl_cache_entry *first_maybe_whiteout; int count; int err; }; @@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, return NULL; } -static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, +static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, p->is_whiteout = false; if (d_type == DT_CHR) { - struct dentry *dentry; - const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) { - kfree(p); - return NULL; - } - - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); - - dentry = lookup_one_len(name, dir, len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } - revert_creds(old_cred); - put_cred(override_cred); + p->next_maybe_whiteout = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p; } return p; } @@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } - p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) return -ENOMEM; @@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, if (p) { list_move_tail(&p->l_node, &rdd->middle); } else { - p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else @@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); } +static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +{ + int err; + struct ovl_cache_entry *p; + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + err = mutex_lock_killable(&dir->d_inode->i_mutex); + if (!err) { + while (rdd->first_maybe_whiteout) { + p = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_len(p->name, dir, p->len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + } + mutex_unlock(&dir->d_inode->i_mutex); + } + revert_creds(old_cred); + put_cred(override_cred); + + return err; +} + static inline int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd) { @@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->dir = realpath->dentry; + rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { rdd->count = 0; @@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath, if (err >= 0) err = rdd->err; } while (!err && rdd->count); + + if (!err && rdd->first_maybe_whiteout) + err = ovl_check_whiteouts(realpath->dentry, rdd); + fput(realfile); return err; @@ -550,7 +571,8 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) (int) PTR_ERR(dentry)); continue; } - ovl_cleanup(upper->d_inode, dentry); + if (dentry->d_inode) + ovl_cleanup(upper->d_inode, dentry); dput(dentry); } mutex_unlock(&upper->d_inode->i_mutex); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index bf8537c7f4552..bd6d5c1e667d2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -275,6 +276,7 @@ static void ovl_dentry_release(struct dentry *dentry) static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, + .d_select_inode = ovl_d_select_inode, }; static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) @@ -473,6 +475,7 @@ static void ovl_put_super(struct super_block *sb) mntput(ufs->upper_mnt); for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); + kfree(ufs->lower_mnt); kfree(ufs->config.lowerdir); kfree(ufs->config.upperdir); @@ -517,10 +520,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) struct super_block *sb = dentry->d_sb; struct ovl_fs *ufs = sb->s_fs_info; - seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); + seq_show_option(m, "lowerdir", ufs->config.lowerdir); if (ufs->config.upperdir) { - seq_printf(m, ",upperdir=%s", ufs->config.upperdir); - seq_printf(m, ",workdir=%s", ufs->config.workdir); + seq_show_option(m, "upperdir", ufs->config.upperdir); + seq_show_option(m, "workdir", ufs->config.workdir); } return 0; } @@ -845,6 +848,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } sb->s_stack_depth = 0; + sb->s_maxbytes = MAX_LFS_FILESIZE; if (ufs->config.upperdir) { if (!ufs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); @@ -980,9 +984,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; } + kfree(stack); root_dentry->d_fsdata = oe; + ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode, + root_dentry->d_inode); + sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; sb->s_root = root_dentry; diff --git a/fs/pnode.h b/fs/pnode.h index 7114ce6e6b9ef..0fcdbe7ca6480 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -20,8 +20,6 @@ #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) -#define IS_MNT_LOCKED_AND_LAZY(m) \ - (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 diff --git a/fs/proc/array.c b/fs/proc/array.c index fd02a9ebfc30e..70f9c4cba31f6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -364,7 +364,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { - unsigned long vsize, eip, esp, wchan = ~0UL; + unsigned long vsize, eip, esp, wchan = 0; int priority, nice; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; @@ -496,7 +496,19 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', wchan); + + /* + * We used to output the absolute kernel address, but that's an + * information leak - so instead we show a 0/1 flag here, to signal + * to user-space whether there's a wchan field in /proc/PID/wchan. + * + * This works with older implementations of procps as well. + */ + if (wchan) + seq_puts(m, " 1"); + else + seq_puts(m, " 0"); + seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ll(m, ' ', task->exit_signal); diff --git a/fs/proc/base.c b/fs/proc/base.c index 093ca14f57015..fcdeb1eb39211 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -238,13 +238,10 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (lookup_symbol_name(wchan, symname) < 0) { - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - return 0; - seq_printf(m, "%lu", wchan); - } else { + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) seq_printf(m, "%s", symname); - } + else + seq_putc(m, '0'); return 0; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index df6327a2b8650..e5dee5c3188eb 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, WARN(1, "create '/proc/%s' by hand\n", qstr.name); return NULL; } + if (is_empty_pde(*parent)) { + WARN(1, "attempt to add to permanently empty directory"); + return NULL; + } ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); if (!ent) @@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name, } EXPORT_SYMBOL(proc_mkdir); +struct proc_dir_entry *proc_create_mount_point(const char *name) +{ + umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, name, mode, 2); + if (ent) { + ent->data = NULL; + ent->proc_fops = NULL; + ent->proc_iops = NULL; + if (proc_register(parent, ent) < 0) { + kfree(ent); + parent->nlink--; + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8272aaba1bb06..e3eb5524639fb 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; + } if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c835b94c0cd3a..aa2781095bd15 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) } extern void pde_put(struct proc_dir_entry *); +static inline bool is_empty_pde(const struct proc_dir_entry *pde) +{ + return S_ISDIR(pde->mode) && !pde->proc_iops; +} +struct proc_dir_entry *proc_create_mount_point(const char *name); + /* * inode.c */ diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fea2561d773bb..fdda62e6115e1 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* Support for permanently empty directories */ + +struct ctl_table sysctl_mount_point[] = { + { } +}; + +static bool is_empty_dir(struct ctl_table_header *head) +{ + return head->ctl_table[0].child == sysctl_mount_point; +} + +static void set_empty_dir(struct ctl_dir *dir) +{ + dir->header.ctl_table[0].child = sysctl_mount_point; +} + +static void clear_empty_dir(struct ctl_dir *dir) + +{ + dir->header.ctl_table[0].child = NULL; +} + void proc_sys_poll_notify(struct ctl_table_poll *poll) { if (!poll) @@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) struct ctl_table *entry; int err; + /* Is this a permanently empty directory? */ + if (is_empty_dir(&dir->header)) + return -EROFS; + + /* Am I creating a permanently empty directory? */ + if (header->ctl_table == sysctl_mount_point) { + if (!RB_EMPTY_ROOT(&dir->root)) + return -EINVAL; + set_empty_dir(dir); + } + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -202,6 +235,8 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) erase_header(header); put_links(header); fail_links: + if (header->ctl_table == sysctl_mount_point) + clear_empty_dir(dir); header->parent = NULL; drop_sysctl_table(&dir->header); return err; @@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mode |= S_IFDIR; inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; + if (is_empty_dir(head)) + make_empty_dir_inode(inode); } out: return inode; diff --git a/fs/proc/root.c b/fs/proc/root.c index b7fa4bfe896a2..68feb0f70e635 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - /* Does the mounter have privilege over the pid namespace? */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -182,10 +179,10 @@ void __init proc_root_init(void) #endif proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); - proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ + proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ - proc_mkdir("openprom", NULL); + proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index dc43b5f29305e..3adcc4669faca 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = { .kill_sb = pstore_kill_sb, }; -static struct kobject *pstore_kobj; - static int __init init_pstore_fs(void) { - int err = 0; + int err; /* Create a convenient mount point for people to access pstore */ - pstore_kobj = kobject_create_and_add("pstore", fs_kobj); - if (!pstore_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "pstore"); + if (err) goto out; - } err = register_filesystem(&pstore_fs_type); if (err < 0) - kobject_put(pstore_kobj); + sysfs_remove_mount_point(fs_kobj, "pstore"); out: return err; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0111ad0466ed4..cf6fa25f884b4 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -714,18 +714,20 @@ static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",acl"); if (REISERFS_SB(s)->s_jdev) - seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev); + seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev); if (journal->j_max_commit_age != journal->j_default_max_commit_age) seq_printf(seq, ",commit=%d", journal->j_max_commit_age); #ifdef CONFIG_QUOTA if (REISERFS_SB(s)->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]); + seq_show_option(seq, "usrjquota", + REISERFS_SB(s)->s_qf_names[USRQUOTA]); else if (opts & (1 << REISERFS_USRQUOTA)) seq_puts(seq, ",usrquota"); if (REISERFS_SB(s)->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]); + seq_show_option(seq, "grpjquota", + REISERFS_SB(s)->s_qf_names[GRPQUOTA]); else if (opts & (1 << REISERFS_GRPQUOTA)) seq_puts(seq, ",grpquota"); if (REISERFS_SB(s)->s_jquota_fmt) { diff --git a/fs/signalfd.c b/fs/signalfd.c index 7e412ad748363..270221fcef42c 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (kinfo->si_code == BUS_MCEERR_AR || - kinfo->si_code == BUS_MCEERR_AO) + if (kinfo->si_signo == SIGBUS && + (kinfo->si_code == BUS_MCEERR_AR || + kinfo->si_code == BUS_MCEERR_AO)) err |= __put_user((short) kinfo->si_addr_lsb, &uinfo->ssi_addr_lsb); #endif diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0b45ff42f3741..94374e4350259 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); } + +/** + * sysfs_create_mount_point - create an always empty directory + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to add + */ +int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *kn, *parent = parent_kobj->sd; + + kn = kernfs_create_empty_dir(parent, name); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(parent, name); + return PTR_ERR(kn); + } + + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_create_mount_point); + +/** + * sysfs_remove_mount_point - remove an always empty directory. + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to remove + * + */ +void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *parent = parent_kobj->sd; + + kernfs_remove_by_name_ns(parent, name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_mount_point); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8a49486bf30c9..1c6ac6fcee9fb 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, bool new_sb; if (!(flags & MS_KERNMOUNT)) { - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } @@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a7..a43df11a163f1 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -631,14 +631,12 @@ bool tracefs_initialized(void) return tracefs_registered; } -static struct kobject *trace_kobj; - static int __init tracefs_init(void) { int retval; - trace_kobj = kobject_create_and_add("tracing", kernel_kobj); - if (!trace_kobj) + retval = sysfs_create_mount_point(kernel_kobj, "tracing"); + if (retval) return -EINVAL; retval = register_filesystem(&trace_fs_type); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 96f3448b6eb40..fd65b3f1923cc 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, { int err; - mutex_lock(&inode->i_mutex); err = security_inode_init_security(inode, dentry, qstr, &init_xattrs, 0); - mutex_unlock(&inode->i_mutex); - if (err) { struct ubifs_info *c = dentry->i_sb->s_fs_info; ubifs_err(c, "cannot initialize security for inode %lu, error %d", diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6afac3d561ac8..78a40ef0c4636 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -2052,14 +2052,29 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, epos->offset += adsize; } +/* + * Only 1 indirect extent in a row really makes sense but allow upto 16 in case + * someone does some weird stuff. + */ +#define UDF_MAX_INDIR_EXTS 16 + int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, struct kernel_lb_addr *eloc, uint32_t *elen, int inc) { int8_t etype; + unsigned int indirections = 0; while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { int block; + + if (++indirections > UDF_MAX_INDIR_EXTS) { + udf_err(inode->i_sb, + "too many indirect extents in inode %lu\n", + inode->i_ino); + return -1; + } + epos->block = *eloc; epos->offset = sizeof(struct allocExtDesc); brelse(epos->bh); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index b84fee372734b..2eafe2c4d2397 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -133,11 +133,15 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) if (c < 0x80U) utf_o->u_name[utf_o->u_len++] = (uint8_t)c; else if (c < 0x800U) { + if (utf_o->u_len > (UDF_NAME_LEN - 4)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); } else { + if (utf_o->u_len > (UDF_NAME_LEN - 5)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); utf_o->u_name[utf_o->u_len++] = @@ -178,17 +182,22 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) { unsigned c, i, max_val, utf_char; - int utf_cnt, u_len; + int utf_cnt, u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; utf_char = 0U; utf_cnt = 0U; for (i = 0U; i < utf->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; + c = (uint8_t)utf->u_name[i]; /* Complete a multi-byte UTF-8 character */ @@ -230,6 +239,7 @@ static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) if (max_val == 0xffU) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } goto error_out; @@ -282,7 +292,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, c = (c << 8) | ocu[i++]; len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - utf_o->u_len); + UDF_NAME_LEN - 2 - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; @@ -300,15 +310,19 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int len; unsigned i, max_val; uint16_t uni_char; - int u_len; + int u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; for (i = 0U; i < uni->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); if (!len) continue; @@ -321,6 +335,7 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, if (uni_char > max_val) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2c1036080d527..a7106eda50241 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -51,8 +51,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); - - lock_ufs(sb); + + mutex_lock(&UFS_SB(sb)->s_lock); cgno = ufs_dtog(uspi, fragment); bit = ufs_dtogd(uspi, fragment); @@ -115,13 +115,13 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - - unlock_ufs(sb); + + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return; } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) goto failed; } - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); do_more: overflow = 0; @@ -211,12 +211,12 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) } ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed_unlock: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); failed: UFSD("EXIT (FAILED)\n"); return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, usb1 = ubh_get_usb_first(uspi); *err = -ENOSPC; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); tmp = ufs_data_ptr_to_cpu(sb, p); if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, "fragment %llu, tmp %llu\n", (unsigned long long)fragment, (unsigned long long)tmp); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return INVBLOCK; } if (fragment < UFS_I(inode)->i_lastfrag) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } else { if (tmp) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, * There is not enough space for user on the device */ if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, fragment + count); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return result; } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7caa016528883..fd0203ce1f7fd 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode) ino = inode->i_ino; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode) bit = ufs_inotocgoff (ino); ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sbi = UFS_SB(sb); uspi = sbi->s_uspi; - lock_ufs(sb); + mutex_lock(&sbi->s_lock); /* * Try to place the inode in its parent directory @@ -331,21 +331,21 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sync_dirty_buffer(bh); brelse(bh); } - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); clear_nlink(inode); unlock_new_inode(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); make_bad_inode(inode); iput (inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index be7d42c7d9382..2d93ab07da8a6 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -902,6 +902,9 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) + if (want_delete) { + lock_ufs(inode->i_sb); ufs_free_inode(inode); + unlock_ufs(inode->i_sb); + } } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index e491a93a7e9af..60ee32249b726 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -128,12 +128,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; + lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_notlocked; + goto out; - lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -174,7 +174,12 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); ihold(inode); - error = ufs_add_nondir(dentry, inode); + error = ufs_add_link(dentry, inode); + if (error) { + inode_dec_link_count(inode); + iput(inode); + } else + d_instantiate(dentry, inode); unlock_ufs(dir->i_sb); return error; } @@ -184,9 +189,13 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; + lock_ufs(dir->i_sb); + inode_inc_link_count(dir); + inode = ufs_new_inode(dir, S_IFDIR|mode); + err = PTR_ERR(inode); if (IS_ERR(inode)) - return PTR_ERR(inode); + goto out_dir; inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -194,9 +203,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -206,6 +212,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) goto out_fail; unlock_ufs(dir->i_sb); + unlock_new_inode(inode); d_instantiate(dentry, inode); out: return err; @@ -215,6 +222,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_dec_link_count(inode); unlock_new_inode(inode); iput (inode); +out_dir: inode_dec_link_count(dir); unlock_ufs(dir->i_sb); goto out; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b3bc3e7ae79db..dc33f94163404 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -694,6 +694,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) unsigned flags; lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); UFSD("ENTER\n"); @@ -711,6 +712,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_put_cstotal(sb); UFSD("EXIT\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; @@ -799,6 +801,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); mutex_init(&sbi->mutex); + mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* @@ -1277,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) sync_filesystem(sb); lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1290,6 +1294,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } @@ -1297,12 +1302,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { pr_err("ufstype can't be changed during remount\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } @@ -1326,6 +1333,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ #ifndef CONFIG_UFS_FS_WRITE pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; #else @@ -1335,11 +1343,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { pr_err("this ufstype is read-only supported\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { pr_err("failed during remounting\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EPERM; } @@ -1347,6 +1357,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 2a07396d5f9eb..cf6368d42d4ab 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -30,6 +30,7 @@ struct ufs_sb_info { int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct mutex s_lock; }; struct ufs_inode_info { diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 20de88d1bf862..dd714037c322d 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + int blksize = mp->m_attr_geo->blksize; char *ptr; int len; xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify( ASSERT(len >= blksize); while (len > 0) { + struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } - if (bip) { - struct xfs_attr3_rmt_hdr *rmt; - rmt = (struct xfs_attr3_rmt_hdr *)ptr; - rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); + /* + * Ensure we aren't writing bogus LSNs to disk. See + * xfs_attr3_rmt_hdr_set() for the explanation. + */ + if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set( rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); + /* + * Remote attribute blocks are written synchronously, so we don't + * have an LSN that we can stamp in them that makes any sense to log + * recovery. To ensure that log recovery handles overwrites of these + * blocks sanely (i.e. once they've been freed and reallocated as some + * other type of metadata) we need to ensure that the LSN has a value + * that tells log recovery to ignore the LSN and overwrite the buffer + * with whatever is in it's log. To do this, we use the magic + * NULLCOMMITLSN to indicate that the LSN is invalid. + */ + rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); + return sizeof(struct xfs_attr3_rmt_hdr); } @@ -434,14 +451,21 @@ xfs_attr_rmtval_set( /* * Allocate a single extent, up to the size of the value. + * + * Note that we have to consider this a data allocation as we + * write the remote attribute without logging the contents. + * Hence we must ensure that we aren't using blocks that are on + * the busy list so that we don't overwrite blocks which have + * recently been freed but their transactions are not yet + * committed to disk. If we overwrite the contents of a busy + * extent and then crash then the block may not contain the + * correct metadata after log recovery occurs. */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, args->total, &map, &nmap, - args->flist); + blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, + args->total, &map, &nmap, args->flist); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 74bcbabfa5232..b14bbd6bb05fa 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote { typedef struct xfs_attr_leafblock { xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ - xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ - xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ + /* + * The rest of the block contains the following structures after the + * leaf entries, growing from the bottom up. The variables are never + * referenced and definining them can actually make gcc optimize away + * accesses to the 'entries' array above index 0 so don't do that. + * + * xfs_attr_leaf_name_local_t namelist; + * xfs_attr_leaf_name_remote_t valuelist; + */ } xfs_attr_leafblock_t; /* diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index de1ea16f57485..534bbf283d6bb 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify( return; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): - xfs_dir3_data_verify(bp); + bp->b_ops = &xfs_dir3_data_buf_ops; + bp->b_ops->verify_read(bp); return; default: xfs_buf_ioerror(bp, -EFSCORRUPTED); diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 41b80d3d38772..06bb4218b3625 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -2132,6 +2132,7 @@ xfs_dir2_node_replace( int error; /* error return value */ int i; /* btree level */ xfs_ino_t inum; /* new inode number */ + int ftype; /* new file type */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ int rval; /* internal return value */ @@ -2145,7 +2146,14 @@ xfs_dir2_node_replace( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; + + /* + * We have to save new inode number and ftype since + * xfs_da3_node_lookup_int() is going to overwrite them + */ inum = args->inumber; + ftype = args->filetype; + /* * Lookup the entry to change in the btree. */ @@ -2183,7 +2191,7 @@ xfs_dir2_node_replace( * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); - args->dp->d_ops->data_put_ftype(dep, args->filetype); + args->dp->d_ops->data_put_ftype(dep, ftype); xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); rval = 0; } diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 6fbf2d853a54e..48aff071591d2 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -54,7 +54,7 @@ xfs_dqcheck( xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ uint flags, - char *str) + const char *str) { xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; int errs = 0; @@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc( STATIC bool xfs_dquot_buf_verify( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + int warn) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; xfs_dqid_t id = 0; @@ -240,8 +241,7 @@ xfs_dquot_buf_verify( if (i == 0) id = be32_to_cpu(ddq->d_id); - error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, - "xfs_dquot_buf_verify"); + error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); if (error) return false; } @@ -256,13 +256,32 @@ xfs_dquot_buf_read_verify( if (!xfs_dquot_buf_verify_crc(mp, bp)) xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dquot_buf_verify(mp, bp)) + else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); } +/* + * readahead errors are silent and simply leave the buffer as !done so a real + * read will then be run with the xfs_dquot_buf_ops verifier. See + * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than + * reporting the failure. + */ +static void +xfs_dquot_buf_readahead_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify_crc(mp, bp) || + !xfs_dquot_buf_verify(mp, bp, 0)) { + xfs_buf_ioerror(bp, -EIO); + bp->b_flags &= ~XBF_DONE; + } +} + /* * we don't calculate the CRC here as that is done when the dquot is flushed to * the buffer after the update is done. This ensures that the dquot in the @@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if (!xfs_dquot_buf_verify(mp, bp)) { + if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; @@ -286,3 +305,7 @@ const struct xfs_buf_ops xfs_dquot_buf_ops = { .verify_write = xfs_dquot_buf_write_verify, }; +const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { + .verify_read = xfs_dquot_buf_readahead_verify, + .verify_write = xfs_dquot_buf_write_verify, +}; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 002b6b3a19885..7da6d0b2c2edb 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -63,11 +63,14 @@ xfs_inobp_check( * has not had the inode cores stamped into it. Hence for readahead, the buffer * may be potentially invalid. * - * If the readahead buffer is invalid, we don't want to mark it with an error, - * but we do want to clear the DONE status of the buffer so that a followup read - * will re-read it from disk. This will ensure that we don't get an unnecessary - * warnings during log recovery and we don't get unnecssary panics on debug - * kernels. + * If the readahead buffer is invalid, we need to mark it with an error and + * clear the DONE status of the buffer so that a followup read will re-read it + * from disk. We don't report the error otherwise to avoid warnings during log + * recovery and we don't get unnecssary panics on debug kernels. We use EIO here + * because all we want to do is say readahead failed; there is no-one to report + * the error to, so this will distinguish it from a non-ra verifier failure. + * Changes to this readahead error behavour also need to be reflected in + * xfs_dquot_buf_readahead_verify(). */ static void xfs_inode_buf_verify( @@ -95,6 +98,7 @@ xfs_inode_buf_verify( XFS_RANDOM_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; + xfs_buf_ioerror(bp, -EIO); return; } diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 1b0a08379759d..f51078f1e92ad 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, char *str); + xfs_dqid_t id, uint type, uint flags, const char *str); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 8dda4b321343b..a3472a38efd20 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; extern const struct xfs_buf_ops xfs_dquot_buf_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 3fbf167cfb4cd..73e75a87af502 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -435,8 +435,14 @@ xfs_attr_inactive( */ xfs_trans_ijoin(trans, dp, 0); - /* invalidate and truncate the attribute fork extents */ - if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + /* + * Invalidate and truncate the attribute fork extents. Make sure the + * fork actually has attributes as otherwise the invalidation has no + * blocks to read and returns an error. In this case, just do the fork + * removal below. + */ + if (xfs_inode_hasattr(dp) && + dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1790b00bea7a7..7dd64bf98c566 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -605,6 +605,13 @@ xfs_buf_get_map( } } + /* + * Clear b_error if this is a lookup from a caller that doesn't expect + * valid data to be found in the buffer. + */ + if (!(flags & XBF_READ)) + xfs_buf_ioerror(bp, 0); + XFS_STATS_INC(xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; @@ -1522,6 +1529,16 @@ xfs_wait_buftarg( LIST_HEAD(dispose); int loop = 0; + /* + * We need to flush the buffer workqueue to ensure that all IO + * completion processing is 100% done. Just waiting on buffer locks is + * not sufficient for async IO as the reference count held over IO is + * not released until after the buffer lock is dropped. Hence we need to + * ensure here that all reference counts have been dropped before we + * start walking the LRU list. + */ + drain_workqueue(btp->bt_mount->m_buf_workqueue); + /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4f5784f85a5b2..1114afdd5a6bb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1887,9 +1887,14 @@ xlog_recover_get_buf_lsn( uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; break; case XFS_ATTR3_RMT_MAGIC: - lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); - uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; - break; + /* + * Remote attr blocks are written synchronously, rather than + * being logged. That means they do not contain a valid LSN + * (i.e. transactionally ordered) in them, and hence any time we + * see a buffer to replay over the top of a remote attribute + * block we should simply do so. + */ + goto recover_immediately; case XFS_SB_MAGIC: lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); uuid = &((struct xfs_dsb *)blk)->sb_uuid; @@ -3149,6 +3154,7 @@ xlog_recover_dquot_ra_pass2( struct xfs_disk_dquot *recddq; struct xfs_dq_logformat *dq_f; uint type; + int len; if (mp->m_qflags == 0) @@ -3169,8 +3175,12 @@ xlog_recover_dquot_ra_pass2( ASSERT(dq_f); ASSERT(dq_f->qlf_len == 1); - xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, - XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL); + len = XFS_FSB_TO_BB(mp, dq_f->qlf_len); + if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0)) + return; + + xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len, + &xfs_dquot_buf_ra_ops); } STATIC void diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 858e1e62bbaa3..65a45372fb1f3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -504,9 +504,9 @@ xfs_showargs( seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); if (mp->m_logname) - seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); + seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname); if (mp->m_rtname) - seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); + seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname); if (mp->m_dalign > 0) seq_printf(m, "," MNTOPT_SUNIT "=%d", diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 3df411eadb867..40c076523cfa7 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -104,7 +104,7 @@ xfs_readlink_bmap( cur_chunk += sizeof(struct xfs_dsymlink_hdr); } - memcpy(link + offset, bp->b_addr, byte_cnt); + memcpy(link + offset, cur_chunk, byte_cnt); pathlen -= byte_cnt; offset += byte_cnt; diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 08ef57bc8d63f..f5ed1f17f0616 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -195,9 +195,18 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); * address. Although ACPICA adheres to the ACPI specification which * requires the use of the corresponding 64-bit address if it is non-zero, * some machines have been found to have a corrupted non-zero 64-bit - * address. Default is TRUE, favor the 32-bit addresses. + * address. Default is FALSE, do not favor the 32-bit addresses. */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, FALSE); + +/* + * Optionally use 32-bit FACS table addresses. + * It is reported that some platforms fail to resume from system suspending + * if 64-bit FACS table address is selected: + * https://bugzilla.kernel.org/show_bug.cgi?id=74021 + * Default is TRUE, favor the 32-bit addresses. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_facs_addresses, TRUE); /* * Optionally truncate I/O addresses to 16 bits. Provides compatibility diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 1c3002e1db20c..181427ef3549c 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -572,6 +572,7 @@ typedef u64 acpi_integer; #define ACPI_NO_ACPI_ENABLE 0x10 #define ACPI_NO_DEVICE_INIT 0x20 #define ACPI_NO_OBJECT_INIT 0x40 +#define ACPI_NO_FACS_INIT 0x80 /* * Initialization state diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 0419485891f2a..0f1c6f315cdc5 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -75,7 +75,7 @@ typedef u64 __nocast cputime64_t; */ static inline cputime_t timespec_to_cputime(const struct timespec *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec; return (__force cputime_t) ret; } static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) @@ -91,7 +91,8 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) */ static inline cputime_t timeval_to_cputime(const struct timeval *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + + val->tv_usec * NSEC_PER_USEC; return (__force cputime_t) ret; } static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index eb6f9e6c30756..b6a53e8e526a5 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!preempt_count() && tif_need_resched()); + return unlikely(preempt_count() == preempt_offset && + tif_need_resched()); } #ifdef CONFIG_PREEMPT diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 98abda9ed3aa8..bbc59bdd6395f 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -199,6 +199,7 @@ struct crypto_ahash { unsigned int keylen); unsigned int reqsize; + bool has_setkey; struct crypto_tfm base; }; @@ -356,6 +357,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); +static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm) +{ + return tfm->has_setkey; +} + /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 018afb264ac26..a2bfd7843f18f 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -30,6 +30,9 @@ struct alg_sock { struct sock *parent; + unsigned int refcnt; + unsigned int nokey_refcnt; + const struct af_alg_type *type; void *private; }; @@ -50,9 +53,11 @@ struct af_alg_type { void (*release)(void *private); int (*setkey)(void *private, const u8 *key, unsigned int keylen); int (*accept)(void *private, struct sock *sk); + int (*accept_nokey)(void *private, struct sock *sk); int (*setauthsize)(void *private, unsigned int authsize); struct proto_ops *ops; + struct proto_ops *ops_nokey; struct module *owner; char name[14]; }; @@ -67,6 +72,7 @@ int af_alg_register_type(const struct af_alg_type *type); int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); +void af_alg_release_parent(struct sock *sk); int af_alg_accept(struct sock *sk, struct socket *newsock); int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); @@ -83,11 +89,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk) return (struct alg_sock *)sk; } -static inline void af_alg_release_parent(struct sock *sk) -{ - sock_put(alg_sk(sk)->parent); -} - static inline void af_alg_init_completion(struct af_alg_completion *completion) { init_completion(&completion->completion); diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index c157103492b0d..3f13b910f8d2e 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -77,26 +77,26 @@ int __must_check drm_atomic_async_commit(struct drm_atomic_state *state); #define for_each_connector_in_state(state, connector, connector_state, __i) \ for ((__i) = 0; \ - (connector) = (state)->connectors[__i], \ - (connector_state) = (state)->connector_states[__i], \ - (__i) < (state)->num_connector; \ + (__i) < (state)->num_connector && \ + ((connector) = (state)->connectors[__i], \ + (connector_state) = (state)->connector_states[__i], 1); \ (__i)++) \ if (connector) #define for_each_crtc_in_state(state, crtc, crtc_state, __i) \ for ((__i) = 0; \ - (crtc) = (state)->crtcs[__i], \ - (crtc_state) = (state)->crtc_states[__i], \ - (__i) < (state)->dev->mode_config.num_crtc; \ + (__i) < (state)->dev->mode_config.num_crtc && \ + ((crtc) = (state)->crtcs[__i], \ + (crtc_state) = (state)->crtc_states[__i], 1); \ (__i)++) \ if (crtc_state) -#define for_each_plane_in_state(state, plane, plane_state, __i) \ - for ((__i) = 0; \ - (plane) = (state)->planes[__i], \ - (plane_state) = (state)->plane_states[__i], \ - (__i) < (state)->dev->mode_config.num_total_plane; \ - (__i)++) \ +#define for_each_plane_in_state(state, plane, plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (state)->dev->mode_config.num_total_plane && \ + ((plane) = (state)->planes[__i], \ + (plane_state) = (state)->plane_states[__i], 1); \ + (__i)++) \ if (plane_state) #endif /* DRM_ATOMIC_H_ */ diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 7bfb063029d83..461a0558bca4d 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,4 +35,13 @@ void drm_clflush_pages(struct page *pages[], unsigned long num_pages); +static inline bool drm_arch_can_wc_memory(void) +{ +#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) + return false; +#else + return true; +#endif +} + #endif diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index a2507817be419..c7f01d1aa5622 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -253,6 +253,7 @@ struct drm_dp_remote_dpcd_write { u8 *bytes; }; +#define DP_REMOTE_I2C_READ_MAX_TRANSACTIONS 4 struct drm_dp_remote_i2c_read { u8 num_transactions; u8 port_number; @@ -262,7 +263,7 @@ struct drm_dp_remote_i2c_read { u8 *bytes; u8 no_stop_bit; u8 i2c_transaction_delay; - } transactions[4]; + } transactions[DP_REMOTE_I2C_READ_MAX_TRANSACTIONS]; u8 read_i2c_device_id; u8 num_bytes_read; }; @@ -448,9 +449,7 @@ struct drm_dp_mst_topology_mgr { the mstb tx_slots and txmsg->state once they are queued */ struct mutex qlock; struct list_head tx_msg_downq; - struct list_head tx_msg_upq; bool tx_down_in_progress; - bool tx_up_in_progress; /* payload info + lock for it */ struct mutex payload_lock; @@ -463,6 +462,10 @@ struct drm_dp_mst_topology_mgr { struct work_struct work; struct work_struct tx_work; + + struct list_head destroy_connector_list; + struct mutex destroy_connector_lock; + struct work_struct destroy_connector_work; }; int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, struct device *dev, struct drm_dp_aux *aux, int max_dpcd_transaction_bytes, int max_payloads, int conn_base_id); diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index d639049a613df..553210c02ee0f 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) +#define DRM_FIXED_EPSILON 1LL +#define DRM_FIXED_ALMOST_ONE (DRM_FIXED_ONE - DRM_FIXED_EPSILON) static inline s64 drm_int2fixp(int a) { return ((s64)a) << DRM_FIXED_POINT; } -static inline int drm_fixp2int(int64_t a) +static inline int drm_fixp2int(s64 a) { return ((s64)a) >> DRM_FIXED_POINT; } -static inline unsigned drm_fixp_msbset(int64_t a) +static inline int drm_fixp2int_ceil(s64 a) +{ + if (a > 0) + return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); + else + return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); +} + +static inline unsigned drm_fixp_msbset(s64 a) { unsigned shift, sign = (a >> 63) & 1; @@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b) return result; } +static inline s64 drm_fixp_from_fraction(s64 a, s64 b) +{ + s64 res; + bool a_neg = a < 0; + bool b_neg = b < 0; + u64 a_abs = a_neg ? -a : a; + u64 b_abs = b_neg ? -b : b; + u64 rem; + + /* determine integer part */ + u64 res_abs = div64_u64_rem(a_abs, b_abs, &rem); + + /* determine fractional part */ + { + u32 i = DRM_FIXED_POINT; + + do { + rem <<= 1; + res_abs <<= 1; + if (rem >= b_abs) { + res_abs |= 1; + rem -= b_abs; + } + } while (--i != 0); + } + + /* round up LSB */ + { + u64 summand = (rem << 1) >= b_abs; + + res_abs += summand; + } + + res = (s64) res_abs; + if (a_neg ^ b_neg) + res = -res; + return res; +} + static inline s64 drm_fixp_exp(s64 x) { s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000); diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 45c39a37f9249..8bc073d297db2 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -172,6 +172,7 @@ {0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29cd..808c43afa8ac9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -198,7 +198,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); - +void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); @@ -440,6 +440,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 extern void acpi_early_init(void); +extern void acpi_subsystem_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); @@ -494,6 +495,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) } static inline void acpi_early_init(void) { } +static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { diff --git a/include/linux/ata.h b/include/linux/ata.h index b666b773e1118..2e5fb1c312512 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -45,6 +45,7 @@ enum { ATA_SECT_SIZE = 512, ATA_MAX_SECTORS_128 = 128, ATA_MAX_SECTORS = 256, + ATA_MAX_SECTORS_1024 = 1024, ATA_MAX_SECTORS_LBA48 = 65535,/* TODO: 65536? */ ATA_MAX_SECTORS_TAPE = 65535, @@ -384,8 +385,6 @@ enum { SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ - SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ - /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, @@ -488,8 +487,8 @@ enum ata_tf_protocols { }; enum ata_ioctls { - ATA_IOC_GET_IO32 = 0x309, - ATA_IOC_SET_IO32 = 0x324, + ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ + ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ }; /* core structures */ @@ -529,8 +528,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) -#define ata_id_has_ncq_autosense(id) \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { @@ -709,20 +706,6 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_3] & (1 << 3); } -static inline bool ata_id_has_sense_reporting(const u16 *id) -{ - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) - return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 6); -} - -static inline bool ata_id_sense_reporting_enabled(const u16 *id) -{ - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) - return false; - return id[ATA_ID_COMMAND_SET_4] & (1 << 6); -} - /** * ata_id_major_version - get ATA level of drive * @id: Identify data diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca1..e6797ded700ec 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457aa..51bb6532785c3 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdcc..e91c6f15f6e81 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -224,6 +224,7 @@ struct ceph_connection { struct ceph_entity_addr actual_peer_addr; /* message out temps */ + struct ceph_msg_header out_hdr; struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ bool out_msg_done; @@ -233,7 +234,6 @@ struct ceph_connection { int out_kvec_left; /* kvec's left in out_kvec */ int out_skip; /* skip this many bytes */ int out_kvec_bytes; /* total bytes left */ - bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h new file mode 100644 index 0000000000000..8d9c7e7a64323 --- /dev/null +++ b/include/linux/cgroup-defs.h @@ -0,0 +1,470 @@ +/* + * linux/cgroup-defs.h - basic definitions for cgroup + * + * This file provides basic type and interface. Include this file directly + * only if necessary to avoid cyclic dependencies. + */ +#ifndef _LINUX_CGROUP_DEFS_H +#define _LINUX_CGROUP_DEFS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_CGROUPS + +struct cgroup; +struct cgroup_root; +struct cgroup_subsys; +struct cgroup_taskset; +struct kernfs_node; +struct kernfs_ops; +struct kernfs_open_file; + +#define MAX_CGROUP_TYPE_NAMELEN 32 +#define MAX_CGROUP_ROOT_NAMELEN 64 +#define MAX_CFTYPE_NAME 64 + +/* define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _cgrp_id, +enum cgroup_subsys_id { +#include + CGROUP_SUBSYS_COUNT, +}; +#undef SUBSYS + +/* bits in struct cgroup_subsys_state flags field */ +enum { + CSS_NO_REF = (1 << 0), /* no reference counting for this css */ + CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ + CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ +}; + +/* bits in struct cgroup flags field */ +enum { + /* Control Group requires release notifications to userspace */ + CGRP_NOTIFY_ON_RELEASE, + /* + * Clone the parent's configuration when creating a new child + * cpuset cgroup. For historical reasons, this option can be + * specified at mount time and thus is implemented here. + */ + CGRP_CPUSET_CLONE_CHILDREN, +}; + +/* cgroup_root->flags */ +enum { + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ + CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ + CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ +}; + +/* cftype->flags */ +enum { + CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ + CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ + CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + + /* internal flags, do not use outside cgroup core proper */ + __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ + __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ +}; + +/* + * Per-subsystem/per-cgroup state maintained by the system. This is the + * fundamental structural building block that controllers deal with. + * + * Fields marked with "PI:" are public and immutable and may be accessed + * directly without synchronization. + */ +struct cgroup_subsys_state { + /* PI: the cgroup that this css is attached to */ + struct cgroup *cgroup; + + /* PI: the cgroup subsystem that this css is attached to */ + struct cgroup_subsys *ss; + + /* reference count - access via css_[try]get() and css_put() */ + struct percpu_ref refcnt; + + /* PI: the parent css */ + struct cgroup_subsys_state *parent; + + /* siblings list anchored at the parent's ->children */ + struct list_head sibling; + struct list_head children; + + /* + * PI: Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). + */ + int id; + + unsigned int flags; + + /* + * Monotonically increasing unique serial number which defines a + * uniform order among all csses. It's guaranteed that all + * ->children lists are in the ascending order of ->serial_nr and + * used to allow interrupting and resuming iterations. + */ + u64 serial_nr; + + /* + * Incremented by online self and children. Used to guarantee that + * parents are not offlined before their children. + */ + atomic_t online_cnt; + + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; + struct work_struct destroy_work; +}; + +/* + * A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire cgroup + * set for a task. + */ +struct css_set { + /* Reference count */ + atomic_t refcount; + + /* + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock + */ + struct hlist_node hlist; + + /* + * Lists running through all tasks using this cgroup group. + * mg_tasks lists tasks which belong to this cset but are in the + * process of being migrated out or in. Protected by + * css_set_rwsem, but, during migration, once tasks are moved to + * mg_tasks, it can be read safely while holding cgroup_mutex. + */ + struct list_head tasks; + struct list_head mg_tasks; + + /* + * List of cgrp_cset_links pointing at cgroups referenced from this + * css_set. Protected by css_set_lock. + */ + struct list_head cgrp_links; + + /* the default cgroup associated with this css_set */ + struct cgroup *dfl_cgrp; + + /* + * Set of subsystem states, one for each subsystem. This array is + * immutable after creation apart from the init_css_set during + * subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* + * List of csets participating in the on-going migration either as + * source or destination. Protected by cgroup_mutex. + */ + struct list_head mg_preload_node; + struct list_head mg_node; + + /* + * If this cset is acting as the source of migration the following + * two fields are set. mg_src_cgrp is the source cgroup of the + * on-going migration and mg_dst_cset is the destination cset the + * target tasks on this cset should be migrated to. Protected by + * cgroup_mutex. + */ + struct cgroup *mg_src_cgrp; + struct css_set *mg_dst_cset; + + /* + * On the default hierarhcy, ->subsys[ssid] may point to a css + * attached to an ancestor instead of the cgroup this css_set is + * associated with. The following node is anchored at + * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to + * iterate through all css's attached to a given cgroup. + */ + struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; +}; + +struct cgroup { + /* self css with NULL ->ss, points back to this cgroup */ + struct cgroup_subsys_state self; + + unsigned long flags; /* "unsigned long" so bitops work */ + + /* + * idr allocated in-hierarchy ID. + * + * ID 0 is not used, the ID of the root cgroup is always 1, and a + * new cgroup will be assigned with a smallest available ID. + * + * Allocating/Removing ID must be protected by cgroup_mutex. + */ + int id; + + /* + * If this cgroup contains any tasks, it contributes one to + * populated_cnt. All children with non-zero popuplated_cnt of + * their own contribute one. The count is zero iff there's no task + * in this cgroup or its subtree. + */ + int populated_cnt; + + struct kernfs_node *kn; /* cgroup kernfs entry */ + struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + + /* + * The bitmask of subsystems enabled on the child cgroups. + * ->subtree_control is the one configured through + * "cgroup.subtree_control" while ->child_subsys_mask is the + * effective one which may have more subsystems enabled. + * Controller knobs are made available iff it's enabled in + * ->subtree_control. + */ + unsigned int subtree_control; + unsigned int child_subsys_mask; + + /* Private pointers for each registered subsystem */ + struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; + + struct cgroup_root *root; + + /* + * List of cgrp_cset_links pointing at css_sets with tasks in this + * cgroup. Protected by css_set_lock. + */ + struct list_head cset_links; + + /* + * On the default hierarchy, a css_set for a cgroup with some + * susbsys disabled will point to css's which are associated with + * the closest ancestor which has the subsys enabled. The + * following lists all css_sets which point to this cgroup's css + * for the given subsystem. + */ + struct list_head e_csets[CGROUP_SUBSYS_COUNT]; + + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; + + /* used to wait for offlining of csses */ + wait_queue_head_t offline_waitq; + + /* used to schedule release agent */ + struct work_struct release_agent_work; +}; + +/* + * A cgroup_root represents the root of a cgroup hierarchy, and may be + * associated with a kernfs_root to form an active hierarchy. This is + * internal to cgroup core. Don't access directly from controllers. + */ +struct cgroup_root { + struct kernfs_root *kf_root; + + /* The bitmask of subsystems attached to this hierarchy */ + unsigned int subsys_mask; + + /* Unique id for this hierarchy. */ + int hierarchy_id; + + /* The root cgroup. Root is destroyed on its release. */ + struct cgroup cgrp; + + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ + atomic_t nr_cgrps; + + /* A list running through the active hierarchies */ + struct list_head root_list; + + /* Hierarchy-specific flags */ + unsigned int flags; + + /* IDs for cgroups in this hierarchy */ + struct idr cgroup_idr; + + /* The path to use for release notifications. */ + char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; +}; + +/* + * struct cftype: handler definitions for cgroup control files + * + * When reading/writing to a file: + * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_path.dentry->d_fsdata + */ +struct cftype { + /* + * By convention, the name should begin with the name of the + * subsystem, followed by a period. Zero length string indicates + * end of cftype array. + */ + char name[MAX_CFTYPE_NAME]; + int private; + /* + * If not 0, file mode is set to this value, otherwise it will + * be figured out automatically + */ + umode_t mode; + + /* + * The maximum length of string, excluding trailing nul, that can + * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. + */ + size_t max_write_len; + + /* CFTYPE_* flags */ + unsigned int flags; + + /* + * Fields used for internal bookkeeping. Initialized automatically + * during registration. + */ + struct cgroup_subsys *ss; /* NULL for cgroup core files */ + struct list_head node; /* anchored at ss->cfts */ + struct kernfs_ops *kf_ops; + + /* + * read_u64() is a shortcut for the common case of returning a + * single integer. Use it in place of read() + */ + u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); + /* + * read_s64() is a signed version of read_u64() + */ + s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); + + /* generic seq_file read interface */ + int (*seq_show)(struct seq_file *sf, void *v); + + /* optional ops, implement all or none */ + void *(*seq_start)(struct seq_file *sf, loff_t *ppos); + void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); + void (*seq_stop)(struct seq_file *sf, void *v); + + /* + * write_u64() is a shortcut for the common case of accepting + * a single integer (as parsed by simple_strtoull) from + * userspace. Use in place of write(); return 0 or error. + */ + int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, + u64 val); + /* + * write_s64() is a signed version of write_u64() + */ + int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val); + + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. Use + * of_css/cft() to access the associated css and cft. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lockdep_key; +#endif +}; + +/* + * Control Group subsystem type. + * See Documentation/cgroups/cgroups.txt for details + */ +struct cgroup_subsys { + struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); + int (*css_online)(struct cgroup_subsys_state *css); + void (*css_offline)(struct cgroup_subsys_state *css); + void (*css_released)(struct cgroup_subsys_state *css); + void (*css_free)(struct cgroup_subsys_state *css); + void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_e_css_changed)(struct cgroup_subsys_state *css); + + int (*can_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*fork)(struct task_struct *task); + void (*exit)(struct cgroup_subsys_state *css, + struct cgroup_subsys_state *old_css, + struct task_struct *task); + void (*bind)(struct cgroup_subsys_state *root_css); + + int disabled; + int early_init; + + /* + * If %false, this subsystem is properly hierarchical - + * configuration, resource accounting and restriction on a parent + * cgroup cover those of its children. If %true, hierarchy support + * is broken in some ways - some subsystems ignore hierarchy + * completely while others are only implemented half-way. + * + * It's now disallowed to create nested cgroups if the subsystem is + * broken and cgroup core will emit a warning message on such + * cases. Eventually, all subsystems will be made properly + * hierarchical and this will go away. + */ + bool broken_hierarchy; + bool warned_broken_hierarchy; + + /* the following two fields are initialized automtically during boot */ + int id; + const char *name; + + /* link to parent, protected by cgroup_lock() */ + struct cgroup_root *root; + + /* idr for css->id */ + struct idr css_idr; + + /* + * List of cftypes. Each entry is the first entry of an array + * terminated by zero length name. + */ + struct list_head cfts; + + /* + * Base cftypes which are automatically registered. The two can + * point to the same array. + */ + struct cftype *dfl_cftypes; /* for the default hierarchy */ + struct cftype *legacy_cftypes; /* for the legacy hierarchies */ + + /* + * A subsystem may depend on other subsystems. When such subsystem + * is enabled on a cgroup, the depended-upon subsystems are enabled + * together if available. Subsystems enabled due to dependency are + * not visible to userland until explicitly enabled. The following + * specifies the mask of subsystems that this one depends on. + */ + unsigned int depends_on; +}; + +#endif /* CONFIG_CGROUPS */ +#endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b9cb94c3102a4..96a2ecd5aa696 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -11,23 +11,16 @@ #include #include #include -#include #include #include #include -#include -#include #include -#include #include #include -#include -#ifdef CONFIG_CGROUPS +#include -struct cgroup_root; -struct cgroup_subsys; -struct cgroup; +#ifdef CONFIG_CGROUPS extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -40,66 +33,6 @@ extern int cgroupstats_build(struct cgroupstats *stats, extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); -/* define the enumeration of all cgroup subsystems */ -#define SUBSYS(_x) _x ## _cgrp_id, -enum cgroup_subsys_id { -#include - CGROUP_SUBSYS_COUNT, -}; -#undef SUBSYS - -/* - * Per-subsystem/per-cgroup state maintained by the system. This is the - * fundamental structural building block that controllers deal with. - * - * Fields marked with "PI:" are public and immutable and may be accessed - * directly without synchronization. - */ -struct cgroup_subsys_state { - /* PI: the cgroup that this css is attached to */ - struct cgroup *cgroup; - - /* PI: the cgroup subsystem that this css is attached to */ - struct cgroup_subsys *ss; - - /* reference count - access via css_[try]get() and css_put() */ - struct percpu_ref refcnt; - - /* PI: the parent css */ - struct cgroup_subsys_state *parent; - - /* siblings list anchored at the parent's ->children */ - struct list_head sibling; - struct list_head children; - - /* - * PI: Subsys-unique ID. 0 is unused and root is always 1. The - * matching css can be looked up using css_from_id(). - */ - int id; - - unsigned int flags; - - /* - * Monotonically increasing unique serial number which defines a - * uniform order among all csses. It's guaranteed that all - * ->children lists are in the ascending order of ->serial_nr and - * used to allow interrupting and resuming iterations. - */ - u64 serial_nr; - - /* percpu_ref killing and RCU release */ - struct rcu_head rcu_head; - struct work_struct destroy_work; -}; - -/* bits in struct cgroup_subsys_state flags field */ -enum { - CSS_NO_REF = (1 << 0), /* no reference counting for this css */ - CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ - CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ -}; - /** * css_get - obtain a reference on the specified css * @css: target css @@ -185,307 +118,6 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } -/* bits in struct cgroup flags field */ -enum { - /* Control Group requires release notifications to userspace */ - CGRP_NOTIFY_ON_RELEASE, - /* - * Clone the parent's configuration when creating a new child - * cpuset cgroup. For historical reasons, this option can be - * specified at mount time and thus is implemented here. - */ - CGRP_CPUSET_CLONE_CHILDREN, -}; - -struct cgroup { - /* self css with NULL ->ss, points back to this cgroup */ - struct cgroup_subsys_state self; - - unsigned long flags; /* "unsigned long" so bitops work */ - - /* - * idr allocated in-hierarchy ID. - * - * ID 0 is not used, the ID of the root cgroup is always 1, and a - * new cgroup will be assigned with a smallest available ID. - * - * Allocating/Removing ID must be protected by cgroup_mutex. - */ - int id; - - /* - * If this cgroup contains any tasks, it contributes one to - * populated_cnt. All children with non-zero popuplated_cnt of - * their own contribute one. The count is zero iff there's no task - * in this cgroup or its subtree. - */ - int populated_cnt; - - struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ - - /* - * The bitmask of subsystems enabled on the child cgroups. - * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. - */ - unsigned int subtree_control; - unsigned int child_subsys_mask; - - /* Private pointers for each registered subsystem */ - struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; - - struct cgroup_root *root; - - /* - * List of cgrp_cset_links pointing at css_sets with tasks in this - * cgroup. Protected by css_set_lock. - */ - struct list_head cset_links; - - /* - * On the default hierarchy, a css_set for a cgroup with some - * susbsys disabled will point to css's which are associated with - * the closest ancestor which has the subsys enabled. The - * following lists all css_sets which point to this cgroup's css - * for the given subsystem. - */ - struct list_head e_csets[CGROUP_SUBSYS_COUNT]; - - /* - * list of pidlists, up to two for each namespace (one for procs, one - * for tasks); created on demand. - */ - struct list_head pidlists; - struct mutex pidlist_mutex; - - /* used to wait for offlining of csses */ - wait_queue_head_t offline_waitq; - - /* used to schedule release agent */ - struct work_struct release_agent_work; -}; - -#define MAX_CGROUP_ROOT_NAMELEN 64 - -/* cgroup_root->flags */ -enum { - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ - CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ - CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ -}; - -/* - * A cgroup_root represents the root of a cgroup hierarchy, and may be - * associated with a kernfs_root to form an active hierarchy. This is - * internal to cgroup core. Don't access directly from controllers. - */ -struct cgroup_root { - struct kernfs_root *kf_root; - - /* The bitmask of subsystems attached to this hierarchy */ - unsigned int subsys_mask; - - /* Unique id for this hierarchy. */ - int hierarchy_id; - - /* The root cgroup. Root is destroyed on its release. */ - struct cgroup cgrp; - - /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ - atomic_t nr_cgrps; - - /* A list running through the active hierarchies */ - struct list_head root_list; - - /* Hierarchy-specific flags */ - unsigned int flags; - - /* IDs for cgroups in this hierarchy */ - struct idr cgroup_idr; - - /* The path to use for release notifications. */ - char release_agent_path[PATH_MAX]; - - /* The name for this hierarchy - may be empty */ - char name[MAX_CGROUP_ROOT_NAMELEN]; -}; - -/* - * A css_set is a structure holding pointers to a set of - * cgroup_subsys_state objects. This saves space in the task struct - * object and speeds up fork()/exit(), since a single inc/dec and a - * list_add()/del() can bump the reference count on the entire cgroup - * set for a task. - */ - -struct css_set { - - /* Reference count */ - atomic_t refcount; - - /* - * List running through all cgroup groups in the same hash - * slot. Protected by css_set_lock - */ - struct hlist_node hlist; - - /* - * Lists running through all tasks using this cgroup group. - * mg_tasks lists tasks which belong to this cset but are in the - * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to - * mg_tasks, it can be read safely while holding cgroup_mutex. - */ - struct list_head tasks; - struct list_head mg_tasks; - - /* - * List of cgrp_cset_links pointing at cgroups referenced from this - * css_set. Protected by css_set_lock. - */ - struct list_head cgrp_links; - - /* the default cgroup associated with this css_set */ - struct cgroup *dfl_cgrp; - - /* - * Set of subsystem states, one for each subsystem. This array is - * immutable after creation apart from the init_css_set during - * subsystem registration (at boot time). - */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - - /* - * List of csets participating in the on-going migration either as - * source or destination. Protected by cgroup_mutex. - */ - struct list_head mg_preload_node; - struct list_head mg_node; - - /* - * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. - */ - struct cgroup *mg_src_cgrp; - struct css_set *mg_dst_cset; - - /* - * On the default hierarhcy, ->subsys[ssid] may point to a css - * attached to an ancestor instead of the cgroup this css_set is - * associated with. The following node is anchored at - * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to - * iterate through all css's attached to a given cgroup. - */ - struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; - - /* For RCU-protected deletion */ - struct rcu_head rcu_head; -}; - -/* - * struct cftype: handler definitions for cgroup control files - * - * When reading/writing to a file: - * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata - * - the 'cftype' of the file is file->f_path.dentry->d_fsdata - */ - -/* cftype->flags */ -enum { - CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ - CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ - CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ - - /* internal flags, do not use outside cgroup core proper */ - __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ - __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ -}; - -#define MAX_CFTYPE_NAME 64 - -struct cftype { - /* - * By convention, the name should begin with the name of the - * subsystem, followed by a period. Zero length string indicates - * end of cftype array. - */ - char name[MAX_CFTYPE_NAME]; - int private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; - - /* - * The maximum length of string, excluding trailing nul, that can - * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. - */ - size_t max_write_len; - - /* CFTYPE_* flags */ - unsigned int flags; - - /* - * Fields used for internal bookkeeping. Initialized automatically - * during registration. - */ - struct cgroup_subsys *ss; /* NULL for cgroup core files */ - struct list_head node; /* anchored at ss->cfts */ - struct kernfs_ops *kf_ops; - - /* - * read_u64() is a shortcut for the common case of returning a - * single integer. Use it in place of read() - */ - u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); - /* - * read_s64() is a signed version of read_u64() - */ - s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); - - /* generic seq_file read interface */ - int (*seq_show)(struct seq_file *sf, void *v); - - /* optional ops, implement all or none */ - void *(*seq_start)(struct seq_file *sf, loff_t *ppos); - void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); - void (*seq_stop)(struct seq_file *sf, void *v); - - /* - * write_u64() is a shortcut for the common case of accepting - * a single integer (as parsed by simple_strtoull) from - * userspace. Use in place of write(); return 0 or error. - */ - int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, - u64 val); - /* - * write_s64() is a signed version of write_u64() - */ - int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, - s64 val); - - /* - * write() is the generic write callback which maps directly to - * kernfs write operation and overrides all other operations. - * Maximum write size is determined by ->max_write_len. Use - * of_css/cft() to access the associated css and cft. - */ - ssize_t (*write)(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lockdep_key; -#endif -}; - extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; @@ -612,11 +244,6 @@ int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -/* - * Control Group taskset, used to pass around set of tasks to cgroup_subsys - * methods. - */ -struct cgroup_taskset; struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); @@ -629,84 +256,6 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) -/* - * Control Group subsystem type. - * See Documentation/cgroups/cgroups.txt for details - */ - -struct cgroup_subsys { - struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); - int (*css_online)(struct cgroup_subsys_state *css); - void (*css_offline)(struct cgroup_subsys_state *css); - void (*css_released)(struct cgroup_subsys_state *css); - void (*css_free)(struct cgroup_subsys_state *css); - void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); - - int (*can_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); - void (*exit)(struct cgroup_subsys_state *css, - struct cgroup_subsys_state *old_css, - struct task_struct *task); - void (*bind)(struct cgroup_subsys_state *root_css); - - int disabled; - int early_init; - - /* - * If %false, this subsystem is properly hierarchical - - * configuration, resource accounting and restriction on a parent - * cgroup cover those of its children. If %true, hierarchy support - * is broken in some ways - some subsystems ignore hierarchy - * completely while others are only implemented half-way. - * - * It's now disallowed to create nested cgroups if the subsystem is - * broken and cgroup core will emit a warning message on such - * cases. Eventually, all subsystems will be made properly - * hierarchical and this will go away. - */ - bool broken_hierarchy; - bool warned_broken_hierarchy; - - /* the following two fields are initialized automtically during boot */ - int id; -#define MAX_CGROUP_TYPE_NAMELEN 32 - const char *name; - - /* link to parent, protected by cgroup_lock() */ - struct cgroup_root *root; - - /* idr for css->id */ - struct idr css_idr; - - /* - * List of cftypes. Each entry is the first entry of an array - * terminated by zero length name. - */ - struct list_head cfts; - - /* - * Base cftypes which are automatically registered. The two can - * point to the same array. - */ - struct cftype *dfl_cftypes; /* for the default hierarchy */ - struct cftype *legacy_cftypes; /* for the legacy hierarchies */ - - /* - * A subsystem may depend on other subsystems. When such subsystem - * is enabled on a cgroup, the depended-upon subsystems are enabled - * together if available. Subsystems enabled due to dependency are - * not visible to userland until explicitly enabled. The following - * specifies the mask of subsystems that this one depends on. - */ - unsigned int depends_on; -}; - #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include #undef SUBSYS diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 0c9a2f2c2802f..d4c71132d07f0 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,10 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier #undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier() __memory_barrier() #define barrier_data(ptr) barrier() #define RELOC_HIDE(ptr, off) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2c..99728072e536c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -142,7 +142,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); */ #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) ) #define __trace_if(cond) \ - if (__builtin_constant_p((cond)) ? !!(cond) : \ + if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ({ \ int ______r; \ static struct ftrace_branch_data \ diff --git a/include/linux/console.h b/include/linux/console.h index 9f50fb413c11c..901555a3886e9 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -149,6 +149,7 @@ extern int console_trylock(void); extern void console_unlock(void); extern void console_conditional_schedule(void); extern void console_unblank(void); +extern void console_flush_on_panic(void); extern struct tty_driver *console_device(int *); extern void console_stop(struct console *); extern void console_start(struct console *); diff --git a/include/linux/cper.h b/include/linux/cper.h index 76abba4b238ec..dcacb1a72e26d 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -340,7 +340,27 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; -/* Memory Error Section */ +/* Old Memory Error Section UEFI 2.1, 2.2 */ +struct cper_sec_mem_err_old { + __u64 validation_bits; + __u64 error_status; + __u64 physical_addr; + __u64 physical_addr_mask; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u8 error_type; +}; + +/* Memory Error Section UEFI >= 2.3 */ struct cper_sec_mem_err { __u64 validation_bits; __u64 error_status; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d0..ca9df45217344 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct inode *(*d_select_inode)(struct dentry *, unsigned); } ____cacheline_aligned; /* @@ -225,6 +226,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ extern seqlock_t rename_lock; @@ -406,9 +408,7 @@ static inline bool d_mountpoint(const struct dentry *dentry) */ static inline unsigned __d_entry_type(const struct dentry *dentry) { - unsigned type = READ_ONCE(dentry->d_flags); - smp_rmb(); - return type & DCACHE_ENTRY_TYPE; + return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 251a2090a5544..e0ee0b3000b2d 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,8 @@ int devpts_new_index(struct inode *ptmx_inode); void devpts_kill_index(struct inode *ptmx_inode, int idx); +void devpts_add_ref(struct inode *ptmx_inode); +void devpts_del_ref(struct inode *ptmx_inode); /* mknod in devpts */ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv); @@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode); /* Dummy stubs in the no-pty case */ static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } +static inline void devpts_add_ref(struct inode *ptmx_inode) { } +static inline void devpts_del_ref(struct inode *ptmx_inode) { } static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv) { diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be5..1ce6e1049a3bb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -428,6 +428,25 @@ static inline void bpf_jit_free(struct bpf_prog *fp) #define BPF_ANC BIT(15) +static inline bool bpf_needs_clear_a(const struct sock_filter *first) +{ + switch (first->code) { + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + return false; + + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X) + return true; + return false; + + default: + return true; + } +} + static inline u16 bpf_anc_helper(const struct sock_filter *ftest) { BUG_ON(ftest->code & BPF_ANC); diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1a..fdc369fa69e8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1036,12 +1036,12 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); +extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); +extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1127,7 +1127,8 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +static inline int posix_lock_inode_wait(struct inode *inode, + struct file_lock *fl) { return -ENOLCK; } @@ -1153,8 +1154,8 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } -static inline int flock_lock_file_wait(struct file *filp, - struct file_lock *request) +static inline int flock_lock_inode_wait(struct inode *inode, + struct file_lock *request) { return -ENOLCK; } @@ -1192,6 +1193,20 @@ static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} + +static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} struct fasync_struct { spinlock_t fa_lock; @@ -1641,7 +1656,6 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); /* WARNING: probably going away soon, do not use! */ - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -1897,6 +1911,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1984,7 +1999,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); @@ -1992,11 +2006,6 @@ extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); -static inline struct inode *file_inode(const struct file *f) -{ - return f->f_inode; -} - /* /sys/fs */ extern struct kobject *fs_kobj; @@ -2193,7 +2202,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); @@ -2780,6 +2788,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1da602982cf93..6cd8c0ee4b6f8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -116,6 +116,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * SAVE_REGS. If another ops with this flag set is already registered * for any of the functions that this ops will be registered for, then * this ops will fail to register or set_filter_ip. + * PID - Is affected by set_ftrace_pid (allows filtering on those pids) */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -132,6 +133,7 @@ enum { FTRACE_OPS_FL_MODIFYING = 1 << 11, FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, FTRACE_OPS_FL_IPMODIFY = 1 << 13, + FTRACE_OPS_FL_PID = 1 << 14, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -159,6 +161,7 @@ struct ftrace_ops { struct ftrace_ops *next; unsigned long flags; void *private; + ftrace_func_t saved_func; int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE int nr_trampolines; diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 3a7c9ffd5ab93..da042657dc31d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -406,6 +406,21 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) return -EINVAL; } +/* Child properties interface */ +struct fwnode_handle; + +static inline struct gpio_desc *fwnode_get_named_gpiod( + struct fwnode_handle *fwnode, const char *propname) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct gpio_desc *devm_get_gpiod_from_child( + struct device *dev, const char *con_id, struct fwnode_handle *child) +{ + return ERR_PTR(-ENOSYS); +} + #endif /* CONFIG_GPIOLIB */ /* diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0042bf330b99f..c02b5ce6c5cdb 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -230,6 +230,7 @@ struct hid_sensor_common { struct platform_device *pdev; unsigned usage_id; atomic_t data_ready; + atomic_t user_requested_state; struct iio_trigger *trigger; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d86b753e9b301..5ed7771ad3863 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -641,6 +641,15 @@ int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, */ #define IIO_DEGREE_TO_RAD(deg) (((deg) * 314159ULL + 9000000ULL) / 18000000ULL) +/** + * IIO_RAD_TO_DEGREE() - Convert rad to degree + * @rad: A value in rad + * + * Returns the given value converted from rad to degree + */ +#define IIO_RAD_TO_DEGREE(rad) \ + (((rad) * 18000000ULL + 314159ULL / 2) / 314159ULL) + /** * IIO_G_TO_M_S_2() - Convert g to meter / second**2 * @g: A value in g @@ -649,4 +658,12 @@ int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, */ #define IIO_G_TO_M_S_2(g) ((g) * 980665ULL / 100000ULL) +/** + * IIO_M_S_2_TO_G() - Convert meter / second**2 to g + * @ms2: A value in meter / second**2 + * + * Returns the given value converted from meter / second**2 to g + */ +#define IIO_M_S_2_TO_G(ms2) (((ms2) * 100000ULL + 980665ULL / 2) / 980665ULL) + #endif /* _INDUSTRIAL_IO_H_ */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e7e53795b0a07..61c03dc0238a9 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -29,6 +29,7 @@ struct ipv6_devconf { __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; + __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; @@ -224,7 +225,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; diff --git a/include/linux/irq.h b/include/linux/irq.h index 62c6901cab550..3532dca843f46 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -467,6 +467,7 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force); extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); +extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); #endif /* Handling of unhandled and spurious interrupts: */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 20e7f78041c81..c90c9b70e5685 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1007,6 +1007,7 @@ struct journal_s #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer @@ -1035,15 +1036,16 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); /* Commit management */ extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal); +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); int __jbd2_journal_remove_checkpoint(struct journal_head *); +void jbd2_journal_destroy_checkpoint(journal_t *journal); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); @@ -1157,7 +1159,7 @@ extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); -extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, +extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, int); extern void __jbd2_journal_abort_hard (journal_t *); extern void jbd2_journal_abort (journal_t *, int); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b8..29d1896c3ba55 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index e705467ddb478..d0a1f99e24e3e 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,7 +28,8 @@ extern void kmemleak_init(void) __ref; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; -extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref; +extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) __ref; extern void kmemleak_free(const void *ptr) __ref; extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; @@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, gfp_t gfp) { } -static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { } static inline void kmemleak_free(const void *ptr) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad45054309a0f..29a57a5b7cee0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -423,6 +423,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu->vcpu_id == id) + return vcpu; + return NULL; +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ diff --git a/include/linux/libata.h b/include/linux/libata.h index 28aeae46f355f..11c2dd114732a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -431,6 +431,9 @@ enum { ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ + ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ + ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ + ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ @@ -714,7 +717,7 @@ struct ata_device { union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ - }; + } ____cacheline_aligned; /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 0755b9fd03a7d..b2085582d44e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1001,6 +1001,34 @@ static inline int page_mapped(struct page *page) return atomic_read(&(page)->_mapcount) >= 0; } +/* + * Return true only if the page has been allocated with + * ALLOC_NO_WATERMARKS and the low watermark was not + * met implying that the system is under some pressure. + */ +static inline bool page_is_pfmemalloc(struct page *page) +{ + /* + * Page index cannot be this large so this must be + * a pfmemalloc page. + */ + return page->index == -1UL; +} + +/* + * Only to be called by the page allocator on a freshly allocated + * page. + */ +static inline void set_page_pfmemalloc(struct page *page) +{ + page->index = -1UL; +} + +static inline void clear_page_pfmemalloc(struct page *page) +{ + page->index = 0; +} + /* * Different kinds of faults, as returned by handle_mm_fault(). * Used to decide whether a process gets delivered SIGBUS or diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007c..c0c6b33535fb1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -63,15 +63,6 @@ struct page { union { pgoff_t index; /* Our offset within mapping. */ void *freelist; /* sl[aou]b first free object */ - bool pfmemalloc; /* If set by the page allocator, - * ALLOC_NO_WATERMARKS was set - * and the low watermark was not - * met implying that the system - * is under some pressure. The - * caller should try ensure - * this page is only used to - * free other pages. - */ }; union { diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index de722d4e9d61b..258daf914c6df 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -121,6 +121,7 @@ struct mmc_data { struct mmc_request *mrq; /* associated request */ unsigned int sg_len; /* size of scatter list */ + int sg_count; /* mapped sg entries */ struct scatterlist *sg; /* I/O scatter list */ s32 host_cookie; /* host private data */ }; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3d4ea7eb2b68b..12b75f3ba0a0c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -175,11 +175,6 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 -/* - * This option could be defined by controller drivers to protect against - * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers - */ -#define NAND_USE_BOUNCE_BUFFER 0x00080000 /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode @@ -187,6 +182,11 @@ typedef enum { * before calling nand_scan_tail. */ #define NAND_BUSWIDTH_AUTO 0x00080000 +/* + * This option could be defined by controller drivers to protect against + * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers + */ +#define NAND_USE_BOUNCE_BUFFER 0x00100000 /* Options set by nand scan */ /* Nand scan has allocated controller struct */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce0838..150f43a9149cb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -540,9 +540,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, static inline loff_t nfs_size_to_loff_t(__u64 size) { - if (size > (__u64) OFFSET_MAX - 1) - return OFFSET_MAX - 1; - return (loff_t) size; + return min_t(u64, size, OFFSET_MAX); } static inline ino_t diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de140..eda4a72a9b25d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -220,7 +220,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -#define NFS_CAP_CHANGE_ATTR (1U << 5) +/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_MODE (1U << 7) #define NFS_CAP_NLINK (1U << 8) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe96..e9e9a8dcfb477 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1142,7 +1142,7 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) +#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; diff --git a/include/linux/of.h b/include/linux/of.h index b871ff9d81d72..8135d507d0895 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -673,7 +673,10 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else -static inline int of_node_to_nid(struct device_node *device) { return 0; } +static inline int of_node_to_nid(struct device_node *device) +{ + return NUMA_NO_NODE; +} #endif static inline struct device_node *of_find_matching_node( diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e0..6e935e5eab565 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -180,6 +180,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), /* Do not use PM reset even if device advertises NoSoftRst- */ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), + /* Get VPD from function 0 VPD */ + PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), }; enum pci_irq_reroute_variant { @@ -577,9 +579,15 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val); +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +typedef u64 pci_bus_addr_t; +#else +typedef u32 pci_bus_addr_t; +#endif + struct pci_bus_region { - dma_addr_t start; - dma_addr_t end; + pci_bus_addr_t start; + pci_bus_addr_t end; }; struct pci_dynids { @@ -1006,6 +1014,7 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); +void pci_ignore_hotplug(struct pci_dev *dev); /* ROM control related routines */ int pci_enable_rom(struct pci_dev *pdev); @@ -1043,11 +1052,6 @@ bool pci_dev_run_wake(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); -static inline void pci_ignore_hotplug(struct pci_dev *dev) -{ - dev->ignore_hotplug = 1; -} - static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { @@ -1128,7 +1132,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); -static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar) +static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { struct pci_bus_region region; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 75a1dd8dc56ee..a80f1fd01ddb6 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -237,6 +237,7 @@ struct power_supply { /* private */ struct device dev; struct work_struct changed_work; + struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; atomic_t use_cnt; diff --git a/include/linux/preempt.h b/include/linux/preempt.h index de83b4eb16428..8cd6725c5758c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -20,7 +20,8 @@ #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); -#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) +#define preempt_count_dec_and_test() \ + ({ preempt_count_sub(1); should_resched(0); }) #else #define preempt_count_add(val) __preempt_count_add(val) #define preempt_count_sub(val) __preempt_count_sub(val) @@ -59,7 +60,7 @@ do { \ #define preempt_check_resched() \ do { \ - if (should_resched()) \ + if (should_resched(0)) \ __preempt_schedule(); \ } while (0) diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h index dbeec4d4a3bea..5cb25f17331a3 100644 --- a/include/linux/preempt_mask.h +++ b/include/linux/preempt_mask.h @@ -71,12 +71,20 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * The preempt_count offset after preempt_disable(); + */ #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_CHECK_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET #else -# define PREEMPT_CHECK_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif +/* + * The preempt_count offset after spin_lock() + */ +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET + /* * The preempt_count offset needed for things like: * @@ -90,7 +98,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot @@ -106,7 +114,7 @@ * (used by the scheduler, *after* releasing the kernel lock) */ #define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) #ifdef CONFIG_PREEMPT_COUNT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 33170dbd9db40..5d5174b59802a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -369,13 +369,29 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags); +/** + * radix_tree_iter_retry - retry this chunk of the iteration + * @iter: iterator state + * + * If we iterate over a tree protected only by the RCU lock, a race + * against deletion or creation may result in seeing a slot for which + * radix_tree_deref_retry() returns true. If so, call this function + * and continue the iteration. + */ +static inline __must_check +void **radix_tree_iter_retry(struct radix_tree_iter *iter) +{ + iter->next_index = iter->index; + return NULL; +} + /** * radix_tree_chunk_size - get current chunk size * * @iter: pointer to radix tree iterator * Returns: current chunk size */ -static __always_inline unsigned +static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { return iter->next_index - iter->index; @@ -409,9 +425,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return slot + offset + 1; } } else { - unsigned size = radix_tree_chunk_size(iter) - 1; + long size = radix_tree_chunk_size(iter); - while (size--) { + while (--size > 0) { slot++; iter->index++; if (likely(*slot)) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c89c53a113a8d..6f48ddc4b2b55 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -105,20 +105,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma) __put_anon_vma(anon_vma); } -static inline void vma_lock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - down_write(&anon_vma->root->rwsem); -} - -static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - up_write(&anon_vma->root->rwsem); -} - static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { down_write(&anon_vma->root->rwsem); diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734f..9128b4e9f5418 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -802,6 +802,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ @@ -2834,12 +2835,6 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT_COUNT -#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET -#else -#define PREEMPT_LOCK_OFFSET 0 -#endif - #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ diff --git a/include/linux/security.h b/include/linux/security.h index 18264ea9e3141..5d45b4fd91d2a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2527,7 +2527,7 @@ static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg4, unsigned long arg5) { - return cap_task_prctl(option, arg2, arg3, arg3, arg5); + return cap_task_prctl(option, arg2, arg3, arg4, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c772..7848473a5bc8b 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -148,6 +148,41 @@ static inline struct user_namespace *seq_user_ns(struct seq_file *seq) #endif } +/** + * seq_show_options - display mount options with appropriate escapes. + * @m: the seq_file handle + * @name: the mount option name + * @value: the mount option name's value, can be NULL + */ +static inline void seq_show_option(struct seq_file *m, const char *name, + const char *value) +{ + seq_putc(m, ','); + seq_escape(m, name, ",= \t\n\\"); + if (value) { + seq_putc(m, '='); + seq_escape(m, value, ", \t\n\\"); + } +} + +/** + * seq_show_option_n - display mount options with appropriate escapes + * where @value must be a specific length. + * @m: the seq_file handle + * @name: the mount option name + * @value: the mount option name's value, cannot be NULL + * @length: the length of @value to display + * + * This is a macro since this uses "length" to define the size of the + * stack buffer. + */ +#define seq_show_option_n(m, name, value, length) { \ + char val_buf[length + 1]; \ + strncpy(val_buf, value, length); \ + val_buf[length] = '\0'; \ + seq_show_option(m, name, val_buf); \ +} + #define SEQ_START_TOKEN ((void *)1) /* * Helpers for iteration over list_head-s in seq_files diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a391cee..c07e3a5360990 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -266,13 +266,13 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * write_seqcount_barrier - invalidate in-progress read-side seq operations + * write_seqcount_invalidate - invalidate in-progress read-side seq operations * @s: pointer to seqcount_t * - * After write_seqcount_barrier, no read-side seq operations will complete + * After write_seqcount_invalidate, no read-side seq operations will complete * successfully and see data older than this. */ -static inline void write_seqcount_barrier(seqcount_t *s) +static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); s->sequence+=2; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b98941311009b..2d52518735a0e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -203,6 +203,7 @@ struct sk_buff; #else #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) #endif +extern int sysctl_max_skb_frags; typedef struct skb_frag_struct skb_frag_t; @@ -1590,20 +1591,16 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; /* - * Propagate page->pfmemalloc to the skb if we can. The problem is - * that not all callers have unique ownership of the page. If - * pfmemalloc is set, we check the mapping as a mapping implies - * page->index is set (index and pfmemalloc share space). - * If it's a valid mapping, we cannot use page->pfmemalloc but we - * do not lose pfmemalloc information as the pages would not be - * allocated using __GFP_MEMALLOC. + * Propagate page pfmemalloc to the skb if we can. The problem is + * that not all callers have unique ownership of the page but rely + * on page_is_pfmemalloc doing the right thing(tm). */ frag->page.p = page; frag->page_offset = off; skb_frag_size_set(frag, size); page = compound_head(page); - if (page->pfmemalloc && !page->mapping) + if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } @@ -2250,7 +2247,7 @@ static inline struct page *dev_alloc_page(void) static inline void skb_propagate_pfmemalloc(struct page *page, struct sk_buff *skb) { - if (page && page->pfmemalloc) + if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } @@ -2592,6 +2589,9 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; } unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); @@ -3321,7 +3321,8 @@ struct skb_gso_cb { int encap_level; __u16 csum_start; }; -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) +#define SKB_SGO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7591788e9fbff..357e44c1a46b1 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,7 @@ struct sock_xprt { /* * Connection of transports */ + unsigned long sock_state; struct delayed_work connect_worker; struct sockaddr_storage srcaddr; unsigned short srcport; @@ -76,6 +77,8 @@ struct sock_xprt { */ #define TCP_RPC_REPLY (1UL << 6) +#define XPRT_SOCK_CONNECTING 1U + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 76d1e38aabe1d..0c53fd51bf9b7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename, asmlinkage long sys_lchown(const char __user *filename, uid_t user, gid_t group); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 asmlinkage long sys_chown16(const char __user *filename, old_uid_t user, old_gid_t group); asmlinkage long sys_lchown16(const char __user *filename, diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea56977..fa7bc29925c92 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17eb..9f65758311a4e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5eac316490eab..2e7d0f7a0ecca 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,6 +40,9 @@ /* No upper/lower limit requirement */ #define THERMAL_NO_LIMIT ((u32)~0) +/* use value, which < 0K, to indicate an invalid/uninitialized temperature */ +#define THERMAL_TEMP_INVALID -274000 + /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) @@ -159,6 +162,7 @@ struct thermal_attr { * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. + * @need_update: if equals 1, thermal_zone_device_update needs to be invoked. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone @@ -185,6 +189,7 @@ struct thermal_zone_device { int emul_temperature; int passive; unsigned int forced_passive; + atomic_t need_update; struct thermal_zone_device_ops *ops; const struct thermal_zone_params *tzp; struct thermal_governor *governor; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a5f7f3ecafa3a..a6e1bca88cc63 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -14,8 +14,10 @@ * See the file COPYING for more details. */ +#include #include #include +#include #include #include @@ -129,6 +131,9 @@ extern void syscall_unregfunc(void); void *it_func; \ void *__data; \ \ + if (!cpu_online(raw_smp_processor_id())) \ + return; \ + \ if (!(cond)) \ return; \ prercu; \ diff --git a/include/linux/tty.h b/include/linux/tty.h index d76631f615c22..9580c09afdbe6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -605,7 +605,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -613,8 +613,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) diff --git a/include/linux/types.h b/include/linux/types.h index 59698be034908..69c44d981da30 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -35,7 +35,7 @@ typedef __kernel_gid16_t gid16_t; typedef unsigned long uintptr_t; -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ typedef __kernel_old_uid_t old_uid_t; typedef __kernel_old_gid_t old_gid_t; @@ -139,12 +139,20 @@ typedef unsigned long blkcnt_t; */ #define pgoff_t unsigned long -/* A dma_addr_t can hold any valid DMA or bus address for the platform */ +/* + * A dma_addr_t can hold any valid DMA address, i.e., any address returned + * by the DMA API. + * + * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32 + * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits, + * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses, + * so they don't care about the size of the actual bus addresses. + */ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT typedef u64 dma_addr_t; #else typedef u32 dma_addr_t; -#endif /* dma_addr_t */ +#endif typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c53..e7827ae2462c5 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -133,6 +133,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); +int cdc_ncm_change_mtu(struct net_device *net, int new_mtu); int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 9948c874e3f1e..1d0043dc34e42 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -47,4 +47,7 @@ /* device generates spurious wakeup, ignore remote wakeup capability */ #define USB_QUIRK_IGNORE_REMOTE_WAKEUP BIT(9) +/* device can't handle Link Power Management */ +#define USB_QUIRK_NO_LPM BIT(10) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1bd..931738bc5bba3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -99,7 +99,6 @@ struct tc_action_ops { int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, @@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ + return __tcf_hash_release(a, bind, false); +} + int tcf_register_action(struct tc_action_ops *a, unsigned int mask); int tcf_unregister_action(struct tc_action_ops *a); int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a175ba4a7adbc..7bb69c9c3c43d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -6,8 +6,8 @@ #include #include -void unix_inflight(struct file *fp); -void unix_notinflight(struct file *fp); +void unix_inflight(struct user_struct *user, struct file *fp); +void unix_notinflight(struct user_struct *user, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); @@ -63,8 +63,13 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; + wait_queue_t peer_wake; }; -#define unix_sk(__sk) ((struct unix_sock *)__sk) + +static inline struct unix_sock *unix_sk(struct sock *sk) +{ + return (struct unix_sock *)sk; +} #define peer_wait peer_wq.wait diff --git a/include/net/dst.h b/include/net/dst.h index 0fb99a26e9737..182b812d45e1b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -312,6 +312,39 @@ static inline void skb_dst_force(struct sk_buff *skb) } } +/** + * dst_hold_safe - Take a reference on a dst if possible + * @dst: pointer to dst entry + * + * This helper returns false if it could not safely + * take a reference on a dst. + */ +static inline bool dst_hold_safe(struct dst_entry *dst) +{ + if (dst->flags & DST_NOCACHE) + return atomic_inc_not_zero(&dst->__refcnt); + dst_hold(dst); + return true; +} + +/** + * skb_dst_force_safe - makes sure skb dst is refcounted + * @skb: buffer + * + * If dst is not yet refcounted and not destroyed, grab a ref on it. + */ +static inline void skb_dst_force_safe(struct sk_buff *skb) +{ + if (skb_dst_is_noref(skb)) { + struct dst_entry *dst = skb_dst(skb); + + if (!dst_hold_safe(dst)) + dst = NULL; + + skb->_skb_refdst = (unsigned long)dst; + } +} + /** * __skb_tunnel_rx - prepare skb for rx reinsert diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 254b3b743369b..9ed2f5a92e668 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -45,7 +45,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + if (sk) + sk_release_kernel(sk); } #endif diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index de838125a4daa..6f3d97d53cd85 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -222,7 +222,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, const unsigned long max_when) { struct inet_connection_sock *icsk = inet_csk(sk); - + //printk("AFR AFR reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + // sk, what, when, current_text_addr()); if (when > max_when) { #ifdef INET_CSK_DEBUG pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 84b20835b736c..0dc0a51da38fa 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -111,11 +111,24 @@ static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) struct ipv6hdr; -static inline int IP6_ECN_set_ce(struct ipv6hdr *iph) +/* Note: + * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, + * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE + * In IPv6 case, no checksum compensates the change in IPv6 header, + * so we have to update skb->csum. + */ +static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { + __be32 from, to; + if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; - *(__be32*)iph |= htonl(INET_ECN_CE << 20); + + from = *(__be32 *)iph; + to = from | htonl(INET_ECN_CE << 20); + *(__be32 *)iph = to; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(csum_sub(skb->csum, from), to); return 1; } @@ -142,7 +155,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) - return IP6_ECN_set_ce(ipv6_hdr(skb)); + return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 360c4802288db..7682cb2ae2371 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -112,7 +112,19 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, + bool rearm); + +static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + +static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, true); +} + void inet_twsk_deschedule(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, diff --git a/include/net/ip.h b/include/net/ip.h index d14af7edd197c..f41fc497b21b2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -161,6 +161,7 @@ static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) } /* datagram.c */ +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5e192068e6cb6..388dea4da0832 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) void ip6_route_input(struct sk_buff *skb); -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6); +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags); + +static inline struct dst_entry *ip6_route_output(struct net *net, + const struct sock *sk, + struct flowi6 *fl6) +{ + return ip6_route_output_flags(net, sk, fl6, 0); +} + struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index b8529aa1dae7a..b0f7445c0fdc7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -83,11 +83,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else { stats->tx_errors++; stats->tx_aborted_errors++; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed0ed45b..13f1a97f6b2b8 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -59,6 +59,7 @@ struct fib_nh_exception { struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; + struct rcu_head rcu; }; struct fnhe_hash_bucket { diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index d8214cb88bbcf..9c2897e56ee1a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -207,12 +207,13 @@ static inline void iptunnel_xmit_stats(int err, struct pcpu_sw_netstats __percpu *stats) { if (err > 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += err; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else if (err < 0) { err_stats->tx_errors++; err_stats->tx_aborted_errors++; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eec8ad3c98432..df555ecd40026 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -217,7 +218,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -250,6 +251,24 @@ struct ipv6_fl_socklist { struct rcu_head rcu; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, @@ -488,6 +507,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + int iif; u8 ecn; }; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index c156948f038f9..cd7b0b7acbded 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -191,7 +191,7 @@ struct mptcp_tcp_sock { u8 loc_id; u8 rem_id; -#define MPTCP_SCHED_SIZE 16 +#define MPTCP_SCHED_SIZE 64 u8 mptcp_sched[MPTCP_SCHED_SIZE] __aligned(8); struct sk_buff *shortcut_ofoqueue; /* Shortcut to the current modified @@ -210,6 +210,17 @@ struct mptcp_tcp_sock { /* HMAC of the third ack */ char sender_mac[20]; + + /* statistics for amiusing */ + u64 bytes_snd; + u64 bytes_rcv; + + /* Delay measurement values */ + u32 delay_in; + u32 delay_out; + + /* counter for unique subflow ids */ + u8 sbf_id; }; struct mptcp_tw { @@ -260,6 +271,23 @@ struct mptcp_sched_ops { char name[MPTCP_SCHED_NAME_MAX]; struct module *owner; + void (*recover_skb) (struct sock *meta_sk, + struct sock *subsk, + struct sk_buff* skb, + bool reinject); + void (*update_stats) (struct sock * subsk, + const struct sk_buff *skb, + unsigned int len, + unsigned int type); +}; + +/* scheduler selection for */ +struct mptcp_sched_select { + struct list_head list; + __be32 dstip; + __be16 sport; + unsigned long till_time_s; + struct mptcp_sched_ops* sched_ops; }; struct mptcp_cb { @@ -289,7 +317,7 @@ struct mptcp_cb { u8 cnt_subflows; u8 cnt_established; -#define MPTCP_SCHED_DATA_SIZE 8 +#define MPTCP_SCHED_DATA_SIZE 128 u8 mptcp_sched[MPTCP_SCHED_DATA_SIZE] __aligned(8); struct mptcp_sched_ops *sched_ops; @@ -348,6 +376,9 @@ struct mptcp_cb { int orig_sk_rcvbuf; int orig_sk_sndbuf; u32 orig_window_clamp; + + /* counter for unique subflow ids */ + u8 last_sbf_id; }; #define MPTCP_VERSION_0 0 @@ -909,9 +940,12 @@ extern struct mptcp_pm_ops mptcp_pm_default; int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_init_scheduler(struct mptcp_cb *mpcb); +struct mptcp_sched_ops *mptcp_sched_find(const char *name); void mptcp_cleanup_scheduler(struct mptcp_cb *mpcb); void mptcp_get_default_scheduler(char *name); int mptcp_set_default_scheduler(const char *name); +int mptcp_set_default_scheduler_for_tuple(const char *name, __be32 dstip, + __be16 sport, unsigned long till_jiffies); bool mptcp_is_available(struct sock *sk, const struct sk_buff *skb, bool zero_wnd_test); bool mptcp_is_def_unavailable(struct sock *sk); @@ -1013,7 +1047,7 @@ static inline bool mptcp_is_data_fin2(const struct sk_buff *skb, { return mptcp_is_data_fin(skb) || (tp->mpcb->infinite_mapping_rcv && - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)); + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)); } static inline u8 mptcp_get_64_bit(u64 data_seq, struct mptcp_cb *mpcb) @@ -1349,6 +1383,8 @@ void mptcp_tcp_set_rto(struct sock *sk); /* TCP and MPTCP flag-depending functions */ bool mptcp_prune_ofo_queue(struct sock *sk); +#define MPTCP_SCHED_MAX_DATA_LEN 1024 + #else /* CONFIG_MPTCP */ #define mptcp_debug(fmt, args...) \ do { \ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d81d584157e11..e8635854a55bd 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,6 +24,8 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(const struct nf_queue_handler *qh); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6bcf55dcf200..fd0ca42b1d63d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -125,7 +125,7 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) { - return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } unsigned int nft_parse_register(const struct nlattr *attr); diff --git a/include/net/netns/mptcp.h b/include/net/netns/mptcp.h index 6680f3bbcfc8d..12ad0c99d5802 100644 --- a/include/net/netns/mptcp.h +++ b/include/net/netns/mptcp.h @@ -44,6 +44,7 @@ struct netns_mptcp { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_mptcp; + struct proc_dir_entry *proc_net_mptcp_rbs; #endif void *path_managers[MPTCP_PM_MAX]; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 3573a81815ad9..8ba379f9e4678 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -31,6 +31,7 @@ struct netns_sctp { struct list_head addr_waitq; struct timer_list addr_wq_timer; struct list_head auto_asconf_splist; + /* Lock that protects both addr_waitq and auto_asconf_splist */ spinlock_t addr_wq_lock; /* Lock that protects the local_addr_list writers */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6d778efcfdfd6..080b657ef8fb2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,6 +61,9 @@ struct Qdisc { */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : + * qdisc_tree_decrease_qlen() should stop. + */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; diff --git a/include/net/scm.h b/include/net/scm.h index 262532d111f51..59fa93c01d2a1 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -21,6 +21,7 @@ struct scm_creds { struct scm_fp_list { short count; short max; + struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2bb2fcf5b11f0..495c87e367b3f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -223,6 +223,10 @@ struct sctp_sock { atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; + + /* These must be the last fields, as they will skipped on copies, + * like on accept and peeloff operations + */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/include/net/sock.h b/include/net/sock.h index 87d8d105e8dfc..c1d346a2ce644 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -386,6 +386,7 @@ struct sock { sk_no_check_rx : 1, sk_userlocks : 4, sk_protocol : 8, +#define SK_PROTOCOL_MAX U8_MAX sk_type : 16; kmemcheck_bitfield_end(flags); int sk_wmem_queued; @@ -723,6 +724,8 @@ enum sock_flags { SOCK_MPTCP, /* MPTCP set on this socket */ }; +#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) + static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) { nsk->sk_flags = osk->sk_flags; @@ -797,7 +800,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force(skb); + skb_dst_force_safe(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; @@ -827,6 +830,14 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s if (sk_rcvqueues_full(sk, limit)) return -ENOBUFS; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + __sk_add_backlog(sk, skb); sk->sk_backlog.len += skb->truesize; return 0; diff --git a/include/net/tcp.h b/include/net/tcp.h index 65950cd33466e..38db0d81ddd17 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -117,7 +117,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN - /* BSD style FIN_WAIT2 deadlock breaker. + /* BSD style FIN_WAIT2 deadlock breaker. * It used to be 3min, new value is 60sec, * to combine FIN-WAIT-2 timeout with * TIME-WAIT timer. @@ -142,8 +142,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes - * for local resources. - */ + * for local resources. + */ #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ @@ -315,7 +315,7 @@ extern int tcp_memory_pressure; static inline bool before(__u32 seq1, __u32 seq2) { - return (__s32)(seq1-seq2) < 0; + return (__s32)(seq1-seq2) < 0; } #define after(seq2, seq1) before(seq1, seq2) @@ -438,7 +438,7 @@ void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); void tcp_v6_hash(struct sock *sk); struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb); struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, + struct request_sock *req, struct dst_entry *dst); void tcp_v6_reqsk_destructor(struct request_sock *req); @@ -821,6 +821,16 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +union tcp_skb_cb_rbs { + struct { + __u8 flags_to_unlink:1, + flags_to_free:1, + flags_not_in_queue:1, + user:5; + }; + __u8 b; +}; + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. @@ -845,6 +855,7 @@ struct tcp_skb_cb { __u8 mptcp_flags; /* flags for the MPTCP layer */ __u8 dss_off; /* Number of 4-byte words until * seq-number */ + union tcp_skb_cb_rbs mptcp_rbs; #endif __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -1579,6 +1590,13 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } +// added for rbs +static inline bool tcp_skb_is_first(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb_queue_is_first(&sk->sk_write_queue, skb); +} + static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_is_last(sk, skb)) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0082b5d33d7d3..7ef9272a405aa 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -78,7 +78,7 @@ struct vxlanhdr { }; /* VXLAN header flags. */ -#define VXLAN_HF_RCO BIT(24) +#define VXLAN_HF_RCO BIT(21) #define VXLAN_HF_VNI BIT(27) #define VXLAN_HF_GBP BIT(31) diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 5a4bb5bb66b3b..1e1421b06565c 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -59,7 +59,6 @@ extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, u64 * info_out); extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); -extern void scsi_set_sense_information(u8 *buf, u64 info); extern int scsi_ioctl_reset(struct scsi_device *, int __user *); diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 63deb8d9f82af..d298857cd8459 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -59,6 +59,7 @@ struct tegra_smmu_soc { bool supports_round_robin_arbitration; bool supports_request_limit; + unsigned int num_tlb_lines; unsigned int num_asids; const struct tegra_smmu_ops *ops; diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index f6cbef78db620..3b91ad5d51158 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count); +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count); +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, + int count); /* main midi functions */ diff --git a/include/sound/soc.h b/include/sound/soc.h index f6226914acfee..8d948aa9c5c92 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -85,7 +85,7 @@ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ SNDRV_CTL_ELEM_ACCESS_READWRITE, \ .tlv.p = (tlv_array),\ - .info = snd_soc_info_volsw, \ + .info = snd_soc_info_volsw_sx, \ .get = snd_soc_get_volsw_sx,\ .put = snd_soc_put_volsw_sx, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ @@ -155,7 +155,7 @@ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ SNDRV_CTL_ELEM_ACCESS_READWRITE, \ .tlv.p = (tlv_array), \ - .info = snd_soc_info_volsw, \ + .info = snd_soc_info_volsw_sx, \ .get = snd_soc_get_volsw_sx, \ .put = snd_soc_put_volsw_sx, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ @@ -563,6 +563,8 @@ int snd_soc_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_info_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); +int snd_soc_info_volsw_sx(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo); #define snd_soc_info_bool_ext snd_ctl_boolean_mono_info int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); diff --git a/include/sound/wm8904.h b/include/sound/wm8904.h index 898be3a8db9ae..6d8f8fba33414 100644 --- a/include/sound/wm8904.h +++ b/include/sound/wm8904.h @@ -119,7 +119,7 @@ #define WM8904_MIC_REGS 2 #define WM8904_GPIO_REGS 4 #define WM8904_DRC_REGS 4 -#define WM8904_EQ_REGS 25 +#define WM8904_EQ_REGS 24 /** * DRC configurations are specified with a label and a set of register diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 54e7af301888f..7bd03f867fcac 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -606,6 +606,7 @@ struct iscsi_conn { int bitmap_id; int rx_thread_active; struct task_struct *rx_thread; + struct completion rx_login_comp; int tx_thread_active; struct task_struct *tx_thread; /* list_head for session connection list */ @@ -786,7 +787,6 @@ struct iscsi_np { enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; - unsigned char np_ip[IPV6_ADDRESS_SPACE]; u16 np_port; spinlock_t np_thread_lock; struct completion np_restart_comp; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 480e9f82dfea8..2b40a1fab2935 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -167,6 +167,7 @@ enum se_cmd_flags_table { SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_COMPARE_AND_WRITE = 0x00080000, SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_ACK_KREF = 0x00400000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ @@ -522,7 +523,7 @@ struct se_cmd { sense_reason_t (*execute_cmd)(struct se_cmd *); sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, u32, enum dma_data_direction); - sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); + sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *); unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; @@ -537,6 +538,8 @@ struct se_cmd { #define CMD_T_DEV_ACTIVE (1 << 7) #define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) +#define CMD_T_TAS (1 << 10) +#define CMD_T_FABRIC_STOP (1 << 11) spinlock_t t_state_lock; struct completion t_transport_stop_comp; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 0f4dc3768587b..24c8d9d0d9463 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -155,8 +155,8 @@ bool transport_wait_for_tasks(struct se_cmd *); int transport_check_aborted_status(struct se_cmd *, int); int transport_send_check_condition_and_sense(struct se_cmd *, sense_reason_t, int); -int target_get_sess_cmd(struct se_session *, struct se_cmd *, bool); -int target_put_sess_cmd(struct se_session *, struct se_cmd *); +int target_get_sess_cmd(struct se_cmd *, bool); +int target_put_sess_cmd(struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); void target_wait_for_sess_cmds(struct se_session *); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index fd1a02cb3c823..003dca9338039 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -529,18 +529,21 @@ TRACE_EVENT(svc_xprt_do_enqueue, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) - __field(struct svc_rqst *, rqst) + __field_struct(struct sockaddr_storage, ss) + __field(int, pid) + __field(unsigned long, flags) ), TP_fast_assign( __entry->xprt = xprt; - __entry->rqst = rqst; + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); + __entry->pid = rqst? rqst->rq_task->pid : 0; + __entry->flags = xprt ? xprt->xpt_flags : 0; ), TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, - (struct sockaddr *)&__entry->xprt->xpt_remote, - __entry->rqst ? __entry->rqst->rq_task->pid : 0, - show_svc_xprt_flags(__entry->xprt->xpt_flags)) + (struct sockaddr *)&__entry->ss, + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); TRACE_EVENT(svc_xprt_dequeue, @@ -589,16 +592,20 @@ TRACE_EVENT(svc_handle_xprt, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) __field(int, len) + __field_struct(struct sockaddr_storage, ss) + __field(unsigned long, flags) ), TP_fast_assign( __entry->xprt = xprt; + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); __entry->len = len; + __entry->flags = xprt ? xprt->xpt_flags : 0; ), TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len, - show_svc_xprt_flags(__entry->xprt->xpt_flags)) + (struct sockaddr *)&__entry->ss, + __entry->len, show_svc_xprt_flags(__entry->flags)) ); #endif /* _TRACE_SUNRPC_H */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b6737f5df6..a7e41fb6ed548 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1065,6 +1065,14 @@ struct drm_i915_reg_read { __u64 offset; __u64 val; /* Return value */ }; +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that use + * offset (0x2538 | 1) instead. + * + */ struct drm_i915_reset_stats { __u32 ctx_id; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5701e8d..cd1c2e3bc56f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7,60 +7,59 @@ #ifndef _UAPI__LINUX_BPF_H__ #define _UAPI__LINUX_BPF_H__ -#include #include +#include /* Extended instruction set based on top of classic BPF */ /* instruction classes */ -#define BPF_ALU64 0x07 /* alu mode in double word width */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ -#define BPF_MOV 0xb0 /* mov reg to reg */ -#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ /* change endianness of a register */ -#define BPF_END 0xd0 /* flags for endianness conversion: */ -#define BPF_TO_LE 0x00 /* convert to little-endian */ -#define BPF_TO_BE 0x08 /* convert to big-endian */ -#define BPF_FROM_LE BPF_TO_LE -#define BPF_FROM_BE BPF_TO_BE - -#define BPF_JNE 0x50 /* jump != */ -#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ -#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ -#define BPF_CALL 0x80 /* function call */ -#define BPF_EXIT 0x90 /* function return */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ /* Register numbers */ -enum { - BPF_REG_0 = 0, - BPF_REG_1, - BPF_REG_2, - BPF_REG_3, - BPF_REG_4, - BPF_REG_5, - BPF_REG_6, - BPF_REG_7, - BPF_REG_8, - BPF_REG_9, - BPF_REG_10, - __MAX_BPF_REG, +enum { BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, }; /* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG __MAX_BPF_REG +#define MAX_BPF_REG __MAX_BPF_REG struct bpf_insn { - __u8 code; /* opcode */ - __u8 dst_reg:4; /* dest register */ - __u8 src_reg:4; /* source register */ - __s16 off; /* signed offset */ - __s32 imm; /* signed immediate constant */ + __u8 code; /* opcode */ + __u8 dst_reg : 4; /* dest register */ + __u8 src_reg : 4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ }; /* BPF syscall commands */ @@ -121,42 +120,43 @@ enum bpf_prog_type { BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_RBS, }; -#define BPF_PSEUDO_MAP_FD 1 +#define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ - __u32 map_type; /* one of enum bpf_map_type */ - __u32 key_size; /* size of key in bytes */ - __u32 value_size; /* size of value in bytes */ - __u32 max_entries; /* max number of entries in a map */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ - __u32 map_fd; - __aligned_u64 key; + __u32 map_fd; + __aligned_u64 key; union { __aligned_u64 value; __aligned_u64 next_key; }; - __u64 flags; + __u64 flags; }; struct { /* anonymous struct used by BPF_PROG_LOAD command */ - __u32 prog_type; /* one of enum bpf_prog_type */ - __u32 insn_cnt; - __aligned_u64 insns; - __aligned_u64 license; - __u32 log_level; /* verbosity level of verifier */ - __u32 log_size; /* size of user buffer */ - __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ }; } __attribute__((aligned(8))); @@ -166,16 +166,20 @@ union bpf_attr { enum bpf_func_id { BPF_FUNC_unspec, BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ + BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, + flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void + *src) */ + BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ + BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int + fmt_size, ...) */ + BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet + * skb_store_bytes(skb, offset, from, len, flags) - store bytes into + * packet * @skb: pointer to skb * @offset: offset within packet from skb->mac_header * @from: pointer where to copy bytes from @@ -199,7 +203,8 @@ enum bpf_func_id { BPF_FUNC_l3_csum_replace, /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum + * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP + * checksum * @skb: pointer to skb * @offset: offset within packet where TCP/UDP checksum is located * @from: old value of header field @@ -210,6 +215,25 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /* eBPF functions for RBS */ + BPF_FUNC_mptcp_rbs_printk, + BPF_FUNC_mptcp_rbs_add_drop, + BPF_FUNC_mptcp_rbs_add_push, + BPF_FUNC_mptcp_rbs_ktime_get_raw_ms, + BPF_FUNC_mptcp_rbs_random, + BPF_FUNC_mptcp_rbs_has_window_for, + BPF_FUNC_mptcp_rbs_bw_out_send, + BPF_FUNC_mptcp_rbs_bw_out_ack, + BPF_FUNC_mptcp_rbs_lossy, + BPF_FUNC_mptcp_rbs_sent_on_all, + BPF_FUNC_mptcp_rbs_q_next, + BPF_FUNC_mptcp_rbs_qu_next, + BPF_FUNC_mptcp_rbs_rq_next, + BPF_FUNC_mptcp_rbs_subflows_next, + BPF_FUNC_mptcp_rbs_varlist_expand, + BPF_FUNC_mptcp_rbs_skb_list_pop, + __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5efa54ae567ca..80f3b74446a1a 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -171,6 +171,8 @@ enum { DEVCONF_USE_OPTIMISTIC, DEVCONF_ACCEPT_RA_MTU, DEVCONF_STABLE_SECRET, + DEVCONF_USE_OIF_ADDRS_ONLY, + DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_MAX }; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index efe3443572baa..413417f3707bb 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -319,6 +319,7 @@ #define PCI_MSIX_PBA 8 /* Pending Bit Array offset */ #define PCI_MSIX_PBA_BIR 0x00000007 /* BAR index */ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_FLAGS_BIRMASK PCI_MSIX_PBA_BIR /* deprecated */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ /* MSI-X Table entry format */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 287e1c3ac7e9a..8f523acd7dbec 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -50,7 +50,7 @@ struct tcphdr { fin:1; #else #error "Adjust your defines" -#endif +#endif __be16 window; __sum16 check; __be16 urg_ptr; @@ -61,14 +61,14 @@ struct tcphdr { * (union is compatible to any of its members) * This means this part of the code is -fstrict-aliasing safe now. */ -union tcp_word_hdr { +union tcp_word_hdr { struct tcphdr hdr; __be32 words[5]; -}; +}; -#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) +#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) -enum { +enum { TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), @@ -79,7 +79,7 @@ enum { TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) -}; +}; /* * TCP general constants @@ -114,6 +114,9 @@ enum { #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ #define MPTCP_ENABLED 42 +#define MPTCP_SCHEDULER 44 +#define MPTCP_SCHEDULER_REG 45 +#define MPTCP_RBS_SKB_PROP 46 struct tcp_repair_opt { __u32 opt_code; diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h index 9ce083960a257..f18490985fc8e 100644 --- a/include/xen/interface/sched.h +++ b/include/xen/interface/sched.h @@ -107,5 +107,13 @@ struct sched_watchdog { #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ #define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 #endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/init/main.c b/init/main.c index 2115055faeac9..2a89545e0a5d6 100644 --- a/init/main.c +++ b/init/main.c @@ -664,6 +664,7 @@ asmlinkage __visible void __init start_kernel(void) check_bugs(); + acpi_subsystem_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3aaea7ffd077c..c3fc5c2b63f34 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -143,7 +143,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) if (!leaf) return -ENOMEM; INIT_LIST_HEAD(&leaf->msg_list); - info->qsize += sizeof(*leaf); } leaf->priority = msg->m_type; rb_link_node(&leaf->rb_node, parent, p); @@ -188,7 +187,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) "lazy leaf delete!\n"); rb_erase(&leaf->rb_node, &info->msg_tree); if (info->node_cache) { - info->qsize -= sizeof(*leaf); kfree(leaf); } else { info->node_cache = leaf; @@ -201,7 +199,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) if (list_empty(&leaf->msg_list)) { rb_erase(&leaf->rb_node, &info->msg_tree); if (info->node_cache) { - info->qsize -= sizeof(*leaf); kfree(leaf); } else { info->node_cache = leaf; @@ -1026,7 +1023,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, /* Save our speculative allocation into the cache */ INIT_LIST_HEAD(&new_leaf->msg_list); info->node_cache = new_leaf; - info->qsize += sizeof(*new_leaf); new_leaf = NULL; } else { kfree(new_leaf); @@ -1133,7 +1129,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, /* Save our speculative allocation into the cache */ INIT_LIST_HEAD(&new_leaf->msg_list); info->node_cache = new_leaf; - info->qsize += sizeof(*new_leaf); } else { kfree(new_leaf); } diff --git a/ipc/msg.c b/ipc/msg.c index 2b6fdbb9e0e9a..652540613d265 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -137,13 +137,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* ipc_addid() locks msq upon success. */ - id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); - if (id < 0) { - ipc_rcu_putref(msq, msg_rcu_free); - return id; - } - msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -153,6 +146,13 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); + /* ipc_addid() locks msq upon success. */ + id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); + if (id < 0) { + ipc_rcu_putref(msq, msg_rcu_free); + return id; + } + ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 2b491590ebab1..71f448e5e927a 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -123,7 +123,7 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) size_t len = src->m_ts; size_t alen; - BUG_ON(dst == NULL); + WARN_ON(dst == NULL); if (src->m_ts > dst->m_ts) return ERR_PTR(-EINVAL); diff --git a/ipc/sem.c b/ipc/sem.c index d1a6edd17eba2..c50aa5755c626 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -252,6 +252,16 @@ static void sem_rcu_free(struct rcu_head *head) ipc_rcu_free(head); } +/* + * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they + * are only control barriers. + * The code must pair with spin_unlock(&sem->lock) or + * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient. + * + * smp_rmb() is sufficient, as writes cannot pass the control barrier. + */ +#define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() + /* * Wait until all currently ongoing simple ops have completed. * Caller must own sem_perm.lock. @@ -275,6 +285,7 @@ static void sem_wait_array(struct sem_array *sma) sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); } + ipc_smp_acquire__after_spin_is_unlocked(); } /* @@ -327,13 +338,12 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* Then check that the global lock is free */ if (!spin_is_locked(&sma->sem_perm.lock)) { /* - * The ipc object lock check must be visible on all - * cores before rechecking the complex count. Otherwise - * we can race with another thread that does: + * We need a memory barrier with acquire semantics, + * otherwise we can race with another thread that does: * complex_count++; * spin_unlock(sem_perm.lock); */ - smp_rmb(); + ipc_smp_acquire__after_spin_is_unlocked(); /* * Now repeat the test of complex_count: @@ -2074,17 +2084,28 @@ void exit_sem(struct task_struct *tsk) rcu_read_lock(); un = list_entry_rcu(ulp->list_proc.next, struct sem_undo, list_proc); - if (&un->list_proc == &ulp->list_proc) - semid = -1; - else - semid = un->semid; + if (&un->list_proc == &ulp->list_proc) { + /* + * We must wait for freeary() before freeing this ulp, + * in case we raced with last sem_undo. There is a small + * possibility where we exit while freeary() didn't + * finish unlocking sem_undo_list. + */ + spin_unlock_wait(&ulp->lock); + rcu_read_unlock(); + break; + } + spin_lock(&ulp->lock); + semid = un->semid; + spin_unlock(&ulp->lock); + /* exit_sem raced with IPC_RMID, nothing to do */ if (semid == -1) { rcu_read_unlock(); - break; + continue; } - sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); + sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) { rcu_read_unlock(); diff --git a/ipc/shm.c b/ipc/shm.c index 6d767071c3673..bbe5f62f2b129 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -155,9 +155,13 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); + /* + * Callers of shm_lock() must validate the status of the returned ipc + * object pointer (as returned by ipc_lock()), and error out as + * appropriate. + */ if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; - + return (void *)ipcp; return container_of(ipcp, struct shmid_kernel, shm_perm); } @@ -183,19 +187,33 @@ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) } -/* This is called by fork, once for every shm attach. */ -static void shm_open(struct vm_area_struct *vma) +static int __shm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); - BUG_ON(IS_ERR(shp)); + + if (IS_ERR(shp)) + return PTR_ERR(shp); + shp->shm_atim = get_seconds(); shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; shm_unlock(shp); + return 0; +} + +/* This is called by fork, once for every shm attach. */ +static void shm_open(struct vm_area_struct *vma) +{ + int err = __shm_open(vma); + /* + * We raced in the idr lookup or with shm_destroy(). + * Either way, the ID is busted. + */ + WARN_ON_ONCE(err); } /* @@ -258,7 +276,14 @@ static void shm_close(struct vm_area_struct *vma) down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); - BUG_ON(IS_ERR(shp)); + + /* + * We raced in the idr lookup or with shm_destroy(). + * Either way, the ID is busted. + */ + if (WARN_ON_ONCE(IS_ERR(shp))) + goto done; /* no-op */ + shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; @@ -266,6 +291,7 @@ static void shm_close(struct vm_area_struct *vma) shm_destroy(ns, shp); else shm_unlock(shp); +done: up_write(&shm_ids(ns).rwsem); } @@ -387,17 +413,25 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) struct shm_file_data *sfd = shm_file_data(file); int ret; + /* + * In case of remap_file_pages() emulation, the file can represent + * removed IPC ID: propogate shm_lock() error to caller. + */ + ret =__shm_open(vma); + if (ret) + return ret; + ret = sfd->file->f_op->mmap(sfd->file, vma); - if (ret != 0) + if (ret) { + shm_close(vma); return ret; + } sfd->vm_ops = vma->vm_ops; #ifdef CONFIG_MMU - BUG_ON(!sfd->vm_ops->fault); + WARN_ON(!sfd->vm_ops->fault); #endif vma->vm_ops = &shm_vm_ops; - shm_open(vma); - - return ret; + return 0; } static int shm_release(struct inode *ino, struct file *file) @@ -550,12 +584,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); - if (id < 0) { - error = id; - goto no_id; - } - shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; @@ -564,6 +592,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; + goto no_id; + } + list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); /* @@ -1191,7 +1226,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, out_nattch: down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); - BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); diff --git a/ipc/util.c b/ipc/util.c index ff3323ef8d8b4..c917e9fd10b13 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -237,6 +237,10 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) rcu_read_lock(); spin_lock(&new->lock); + current_euid_egid(&euid, &egid); + new->cuid = new->uid = euid; + new->gid = new->cgid = egid; + id = idr_alloc(&ids->ipcs_idr, new, (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); @@ -249,10 +253,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) ids->in_use++; - current_euid_egid(&euid, &egid); - new->cuid = new->uid = euid; - new->gid = new->cgid = egid; - if (next_id < 0) { new->seq = ids->seq++; if (ids->seq > IPCID_SEQ_MAX) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8a6616583f38a..1c1b8ab340373 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -109,7 +109,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_size * index, value, array->elem_size); + memcpy(array->value + array->elem_size * index, value, map->value_size); return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47dcd3aa6e236..6582410a71c79 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1019,6 +1019,16 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) return -EINVAL; } + if ((opcode == BPF_LSH || opcode == BPF_RSH || + opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { + int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; + + if (insn->imm < 0 || insn->imm >= size) { + verbose("invalid shift %d\n", insn->imm); + return -EINVAL; + } + } + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && regs[insn->dst_reg].type == FRAME_PTR && @@ -1934,7 +1944,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta) /* adjust offset of jmps if necessary */ if (i < pos && i + insn->off + 1 > pos) insn->off += delta; - else if (i > pos && i + insn->off + 1 < pos) + else if (i > pos + delta && i + insn->off + 1 <= pos + delta) insn->off -= delta; } } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 469dd547770ca..359da3abb004f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1319,7 +1319,7 @@ static int cgroup_show_options(struct seq_file *seq, for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) - seq_printf(seq, ",%s", ss->name); + seq_show_option(seq, ss->name, NULL); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -1327,13 +1327,14 @@ static int cgroup_show_options(struct seq_file *seq, spin_lock(&release_agent_path_lock); if (strlen(root->release_agent_path)) - seq_printf(seq, ",release_agent=%s", root->release_agent_path); + seq_show_option(seq, "release_agent", + root->release_agent_path); spin_unlock(&release_agent_path_lock); if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags)) seq_puts(seq, ",clone_children"); if (strlen(root->name)) - seq_printf(seq, ",name=%s", root->name); + seq_show_option(seq, "name", root->name); return 0; } @@ -1924,8 +1925,6 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; -static struct kobject *cgroup_kobj; - /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -4482,6 +4481,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); css->serial_nr = css_serial_nr_next++; + atomic_set(&css->online_cnt, 0); if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); @@ -4504,6 +4504,10 @@ static int online_css(struct cgroup_subsys_state *css) if (!ret) { css->flags |= CSS_ONLINE; rcu_assign_pointer(css->cgroup->subsys[ss->id], css); + + atomic_inc(&css->online_cnt); + if (css->parent) + atomic_inc(&css->parent->online_cnt); } return ret; } @@ -4741,10 +4745,15 @@ static void css_killed_work_fn(struct work_struct *work) container_of(work, struct cgroup_subsys_state, destroy_work); mutex_lock(&cgroup_mutex); - offline_css(css); - mutex_unlock(&cgroup_mutex); - css_put(css); + do { + offline_css(css); + css_put(css); + /* @css can't go away while we're holding cgroup_mutex */ + css = css->parent; + } while (css && atomic_dec_and_test(&css->online_cnt)); + + mutex_unlock(&cgroup_mutex); } /* css kill confirmation processing requires process context, bounce */ @@ -4753,8 +4762,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); - INIT_WORK(&css->destroy_work, css_killed_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + if (atomic_dec_and_test(&css->online_cnt)) { + INIT_WORK(&css->destroy_work, css_killed_work_fn); + queue_work(cgroup_destroy_wq, &css->destroy_work); + } } /** @@ -5044,13 +5055,13 @@ int __init cgroup_init(void) ss->bind(init_css_set.subsys[ssid]); } - cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); - if (!cgroup_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "cgroup"); + if (err) + return err; err = register_filesystem(&cgroup_fs_type); if (err < 0) { - kobject_put(cgroup_kobj); + sysfs_remove_mount_point(fs_kobj, "cgroup"); return err; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ee14e3a35a299..f0acff0f66c91 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1223,7 +1223,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, spin_unlock_irq(&callback_lock); /* use trialcs->mems_allowed as a temp variable */ - update_nodemasks_hier(cs, &cs->mems_allowed); + update_nodemasks_hier(cs, &trialcs->mems_allowed); done: return retval; } diff --git a/kernel/events/core.c b/kernel/events/core.c index eddf1ed4155ea..e1af58e23bee9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1886,8 +1886,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - event->tstamp_running += tstamp - event->tstamp_stopped; - perf_set_shadow_time(event, ctx, tstamp); perf_log_itrace_start(event); @@ -1899,6 +1897,8 @@ event_sched_in(struct perf_event *event, goto out; } + event->tstamp_running += tstamp - event->tstamp_stopped; + if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) @@ -3976,28 +3976,21 @@ static void perf_event_for_each(struct perf_event *event, perf_event_for_each_child(sibling, func); } -static int perf_event_period(struct perf_event *event, u64 __user *arg) -{ - struct perf_event_context *ctx = event->ctx; - int ret = 0, active; +struct period_event { + struct perf_event *event; u64 value; +}; - if (!is_sampling_event(event)) - return -EINVAL; - - if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - - if (!value) - return -EINVAL; +static int __perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + struct perf_event_context *ctx = event->ctx; + u64 value = pe->value; + bool active; - raw_spin_lock_irq(&ctx->lock); + raw_spin_lock(&ctx->lock); if (event->attr.freq) { - if (value > sysctl_perf_event_sample_rate) { - ret = -EINVAL; - goto unlock; - } - event->attr.sample_freq = value; } else { event->attr.sample_period = value; @@ -4016,11 +4009,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) event->pmu->start(event, PERF_EF_RELOAD); perf_pmu_enable(ctx->pmu); } + raw_spin_unlock(&ctx->lock); -unlock: + return 0; +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct period_event pe = { .event = event, }; + struct perf_event_context *ctx = event->ctx; + struct task_struct *task; + u64 value; + + if (!is_sampling_event(event)) + return -EINVAL; + + if (copy_from_user(&value, arg, sizeof(value))) + return -EFAULT; + + if (!value) + return -EINVAL; + + if (event->attr.freq && value > sysctl_perf_event_sample_rate) + return -EINVAL; + + task = ctx->task; + pe.value = value; + + if (!task) { + cpu_function_call(event->cpu, __perf_event_period, &pe); + return 0; + } + +retry: + if (!task_function_call(task, __perf_event_period, &pe)) + return 0; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + raw_spin_unlock_irq(&ctx->lock); + task = ctx->task; + goto retry; + } + + __perf_event_period(&pe); raw_spin_unlock_irq(&ctx->lock); - return ret; + return 0; } static const struct file_operations perf_fops; @@ -4331,20 +4366,20 @@ static void ring_buffer_attach(struct perf_event *event, WARN_ON_ONCE(event->rcu_pending); old_rb = event->rb; - event->rcu_batches = get_state_synchronize_rcu(); - event->rcu_pending = 1; - spin_lock_irqsave(&old_rb->event_lock, flags); list_del_rcu(&event->rb_entry); spin_unlock_irqrestore(&old_rb->event_lock, flags); - } - if (event->rcu_pending && rb) { - cond_synchronize_rcu(event->rcu_batches); - event->rcu_pending = 0; + event->rcu_batches = get_state_synchronize_rcu(); + event->rcu_pending = 1; } if (rb) { + if (event->rcu_pending) { + cond_synchronize_rcu(event->rcu_batches); + event->rcu_pending = 0; + } + spin_lock_irqsave(&rb->event_lock, flags); list_add_rcu(&event->rb_entry, &rb->event_list); spin_unlock_irqrestore(&rb->event_lock, flags); @@ -4376,14 +4411,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; @@ -4766,12 +4793,20 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); if (event->pending_kill) { - kill_fasync(&event->fasync, SIGIO, event->pending_kill); + kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); event->pending_kill = 0; } } @@ -6117,7 +6152,7 @@ static int __perf_event_overflow(struct perf_event *event, else perf_event_output(event, data, regs); - if (event->fasync && event->pending_kill) { + if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; irq_work_queue(&event->pending); } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 9f6ce9ba4a043..a6adc36a3732f 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 725c416085e31..7f63ad978cb8b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -547,17 +559,30 @@ static void __rb_free_aux(struct ring_buffer *rb) rb->aux_priv = NULL; } - for (pg = 0; pg < rb->aux_nr_pages; pg++) - rb_free_aux_page(rb, pg); + if (rb->aux_nr_pages) { + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); - kfree(rb->aux_pages); - rb->aux_nr_pages = 0; + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; + } } void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/fork.c b/kernel/fork.c index 03c1eaaa6ef56..8209fa2d36ef8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1854,13 +1854,21 @@ static int check_unshare_flags(unsigned long unshare_flags) CLONE_NEWUSER|CLONE_NEWPID)) return -EINVAL; /* - * Not implemented, but pretend it works if there is nothing to - * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND - * needs to unshare vm. + * Not implemented, but pretend it works if there is nothing + * to unshare. Note that unsharing the address space or the + * signal handlers also need to unshare the signal queues (aka + * CLONE_THREAD). */ if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { - /* FIXME: get_task_mm() increments ->mm_users */ - if (atomic_read(¤t->mm->mm_users) > 1) + if (!thread_group_empty(current)) + return -EINVAL; + } + if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { + if (atomic_read(¤t->sighand->count) > 1) + return -EINVAL; + } + if (unshare_flags & CLONE_VM) { + if (!current_is_single_threaded()) return -EINVAL; } @@ -1928,16 +1936,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) */ if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; - /* - * If unsharing a thread from a thread group, must also unshare vm. - */ - if (unshare_flags & CLONE_THREAD) - unshare_flags |= CLONE_VM; /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; + /* + * If unsharing a signal handlers, must also unshare the signal queues. + */ + if (unshare_flags & CLONE_SIGHAND) + unshare_flags |= CLONE_THREAD; /* * If unsharing namespace, must also unshare filesystem information. */ diff --git a/kernel/futex.c b/kernel/futex.c index 2579e407ff67d..f3043db6d36f1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2632,6 +2632,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + /* + * Drop the reference to the pi state which + * the requeue_pi() code acquired for us. + */ + free_pi_state(q.pi_state); spin_unlock(q.lock_ptr); } } else { diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index eb9a4ea394ab3..94bbd8fee90da 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -933,6 +933,23 @@ int irq_chip_set_affinity_parent(struct irq_data *data, return -ENOSYS; } +/** + * irq_chip_set_type_parent - Set IRQ type on the parent interrupt + * @data: Pointer to interrupt specific data + * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h + * + * Conditional, as the underlying parent chip might not implement it. + */ +int irq_chip_set_type_parent(struct irq_data *data, unsigned int type) +{ + data = data->parent_data; + + if (data->chip->irq_set_type) + return data->chip->irq_set_type(data, type); + + return -ENOSYS; +} + /** * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware * @data: Pointer to interrupt specific data @@ -946,7 +963,7 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data) if (data->chip && data->chip->irq_retrigger) return data->chip->irq_retrigger(data); - return -ENOSYS; + return 0; } /** diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index d5d0f7345c545..74d90a7542688 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -104,7 +104,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return -ENOMEM; rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); - if (rc) { + if (rc < 0) { devres_free(dr); return rc; } @@ -113,7 +113,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, dr->dev_id = dev_id; devres_add(dev, dr); - return 0; + return rc; } EXPORT_SYMBOL(devm_request_any_context_irq); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index df2f4642d1e7b..5c38f59741e25 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internals.h" @@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_MUTEX(register_lock); char name [MAX_NAMELEN]; - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + memset(name, 0, MAX_NAMELEN); sprintf(name, "%d", irq); /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - return; + goto out_unlock; #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ @@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 9065107f083e9..7a5237a1bce5b 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -75,13 +75,21 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { #ifdef CONFIG_HARDIRQS_SW_RESEND /* - * If the interrupt has a parent irq and runs - * in the thread context of the parent irq, - * retrigger the parent. + * If the interrupt is running in the thread + * context of the parent irq we need to be + * careful, because we cannot trigger it + * directly. */ - if (desc->parent_irq && - irq_settings_is_nested_thread(desc)) + if (irq_settings_is_nested_thread(desc)) { + /* + * If the parent_irq is valid, we + * retrigger the parent, otherwise we + * do nothing. + */ + if (!desc->parent_irq) + return; irq = desc->parent_irq; + } /* Set it pending and activate the softirq: */ set_bit(irq, irqs_resend); tasklet_schedule(&resend_tasklet); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 284e2691e3807..9ec555732f1a9 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -179,7 +179,9 @@ static int klp_find_object_symbol(const char *objname, const char *name, .count = 0 }; + mutex_lock(&module_mutex); kallsyms_on_each_symbol(klp_find_callback, &args); + mutex_unlock(&module_mutex); if (args.count == 0) pr_err("symbol '%s' not found in symbol table\n", name); @@ -219,13 +221,19 @@ static int klp_verify_vmlinux_symbol(const char *name, unsigned long addr) .name = name, .addr = addr, }; + int ret; - if (kallsyms_on_each_symbol(klp_verify_callback, &args)) - return 0; + mutex_lock(&module_mutex); + ret = kallsyms_on_each_symbol(klp_verify_callback, &args); + mutex_unlock(&module_mutex); - pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", - name, addr); - return -EINVAL; + if (!ret) { + pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", + name, addr); + return -EINVAL; + } + + return 0; } static int klp_find_verify_func_addr(struct klp_object *obj, diff --git a/kernel/module.c b/kernel/module.c index cfc9e843a9240..3b9ff966edb93 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -906,11 +906,15 @@ void symbol_put_addr(void *addr) if (core_kernel_text(a)) return; - /* module_text_address is safe here: we're supposed to have reference - * to module from symbol_get, so it can't go away. */ + /* + * Even though we hold a reference on the module; we still need to + * disable preemption in order to safely traverse the data structure. + */ + preempt_disable(); modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); + preempt_enable(); } EXPORT_SYMBOL_GPL(symbol_put_addr); diff --git a/kernel/panic.c b/kernel/panic.c index 8136ad76e5fd3..a4f7820f59302 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -23,6 +23,7 @@ #include #include #include +#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -146,6 +147,17 @@ void panic(const char *fmt, ...) bust_spinlocks(0); + /* + * We may have ended up stopping the CPU holding the lock (in + * smp_send_stop()) while still having some valuable data in the console + * buffer. Try to acquire the lock then release it regardless of the + * result. The release will also print the buffers out. Locks debug + * should be disabled to avoid reporting bad unlock balance when + * panic() is not being callled from OOPS. + */ + debug_locks_off(); + console_flush_on_panic(); + if (!panic_blink) panic_blink = no_blink; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 7e01f78f04177..9e302315e33db 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -187,7 +187,7 @@ config DPM_WATCHDOG config DPM_WATCHDOG_TIMEOUT int "Watchdog timeout in seconds" range 1 120 - default 12 + default 60 depends on DPM_WATCHDOG config PM_TRACE diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c099b082cd027..3c1aca0c35438 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -484,11 +484,11 @@ int check_syslog_permissions(int type, bool from_file) * already done the capabilities checks at open time. */ if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; + goto ok; if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) - return 0; + goto ok; /* * For historical reasons, accept CAP_SYS_ADMIN too, with * a warning. @@ -498,10 +498,11 @@ int check_syslog_permissions(int type, bool from_file) "CAP_SYS_ADMIN but no CAP_SYSLOG " "(deprecated).\n", current->comm, task_pid_nr(current)); - return 0; + goto ok; } return -EPERM; } +ok: return security_syslog(type); } @@ -1263,10 +1264,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) if (error) goto out; - error = security_syslog(type); - if (error) - return error; - switch (type) { case SYSLOG_ACTION_CLOSE: /* Close log */ break; @@ -2176,13 +2173,24 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; - bool retry; + bool do_cond_resched, retry; if (console_suspended) { up_console_sem(); return; } + /* + * Console drivers are called under logbuf_lock, so + * @console_may_schedule should be cleared before; however, we may + * end up dumping a lot of lines, for example, if called from + * console registration path, and should invoke cond_resched() + * between lines if allowable. Not doing so can cause a very long + * scheduling stall on a slow console leading to RCU stall and + * softlockup warnings which exacerbate the issue with more + * messages practically incapacitating the system. + */ + do_cond_resched = console_may_schedule; console_may_schedule = 0; /* flush buffered message fragment immediately to console */ @@ -2244,6 +2252,9 @@ void console_unlock(void) call_console_drivers(level, text, len); start_critical_timings(); local_irq_restore(flags); + + if (do_cond_resched) + cond_resched(); } console_locked = 0; @@ -2311,6 +2322,25 @@ void console_unblank(void) console_unlock(); } +/** + * console_flush_on_panic - flush console content on panic + * + * Immediately output all pending messages no matter what. + */ +void console_flush_on_panic(void) +{ + /* + * If someone else is holding the console lock, trylock will fail + * and may_schedule may be set. Ignore and proceed to unlock so + * that messages are flushed out. As this can be called from any + * context and we don't want to get preempted while flushing, + * ensure may_schedule is cleared. + */ + console_trylock(); + console_may_schedule = 0; + console_unlock(); +} + /* * Return the console tty driver structure and its associated index */ diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 069742d61c688..ec3086879cb51 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -170,6 +170,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + if (rcp->donetail == &rcp->rcucblist) { + /* No callbacks ready, so just leave. */ + local_irq_restore(flags); + return; + } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 123673291ffbb..4d870eb6086ba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2217,11 +2217,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for TASK_DEAD must occur while the runqueue locks are - * still held, otherwise prev could be scheduled on another cpu, die - * there before we look at prev->state, and then the reference would - * be dropped twice. - * Manfred Spraul + * + * We must observe prev->state before clearing prev->on_cpu (in + * finish_lock_switch), otherwise a concurrent wakeup can get prev + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. */ prev_state = prev->state; vtime_task_switch(prev); @@ -2358,13 +2358,20 @@ unsigned long nr_running(void) /* * Check if only the current task is running on the cpu. + * + * Caution: this function does not check that the caller has disabled + * preemption, thus the result might have a time-of-check-to-time-of-use + * race. The caller is responsible to use it correctly, for example: + * + * - from a non-preemptable section (of course) + * + * - from a thread that is bound to a single CPU + * + * - in a loop with very short iterations (e.g. a polling loop) */ bool single_task_running(void) { - if (cpu_rq(smp_processor_id())->nr_running == 1) - return true; - else - return false; + return raw_rq()->nr_running == 1; } EXPORT_SYMBOL(single_task_running); @@ -4225,7 +4232,7 @@ SYSCALL_DEFINE0(sched_yield) int __sched _cond_resched(void) { - if (should_resched()) { + if (should_resched(0)) { preempt_schedule_common(); return 1; } @@ -4243,7 +4250,7 @@ EXPORT_SYMBOL(_cond_resched); */ int __cond_resched_lock(spinlock_t *lock) { - int resched = should_resched(); + int resched = should_resched(PREEMPT_LOCK_OFFSET); int ret = 0; lockdep_assert_held(lock); @@ -4265,7 +4272,7 @@ int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); - if (should_resched()) { + if (should_resched(SOFTIRQ_DISABLE_OFFSET)) { local_bh_enable(); preempt_schedule_common(); local_bh_disable(); @@ -5328,6 +5335,14 @@ static int sched_cpu_active(struct notifier_block *nfb, case CPU_STARTING: set_cpu_rq_start_time(); return NOTIFY_OK; + case CPU_ONLINE: + /* + * At this point a starting CPU has marked itself as online via + * set_cpu_online(). But it might not yet have marked itself + * as active, which is essential from here on. + * + * Thus, fall-through and help the starting CPU along. + */ case CPU_DOWN_FAILED: set_cpu_active((long)hcpu, true); return NOTIFY_OK; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c2980e8733bcb..77690b653ca9e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5126,18 +5126,21 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) * entity, update_curr() will update its vruntime, otherwise * forget we've ever seen it. */ - if (curr && curr->on_rq) - update_curr(cfs_rq); - else - curr = NULL; + if (curr) { + if (curr->on_rq) + update_curr(cfs_rq); + else + curr = NULL; - /* - * This call to check_cfs_rq_runtime() will do the throttle and - * dequeue its entity in the parent(s). Therefore the 'simple' - * nr_running test will indeed be correct. - */ - if (unlikely(check_cfs_rq_runtime(cfs_rq))) - goto simple; + /* + * This call to check_cfs_rq_runtime() will do the + * throttle and dequeue its entity in the parent(s). + * Therefore the 'simple' nr_running test will indeed + * be correct. + */ + if (unlikely(check_cfs_rq_runtime(cfs_rq))) + goto simple; + } se = pick_next_entity(cfs_rq, curr); cfs_rq = group_cfs_rq(se); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e0e1299939588..aa1f059de4f7f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1068,9 +1068,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) * After ->on_cpu is cleared, the task can be moved to a different CPU. * We must ensure this doesn't happen until the switch is completely * finished. + * + * Pairs with the control dependency and rmb in try_to_wake_up(). */ - smp_wmb(); - prev->on_cpu = 0; + smp_store_release(&prev->on_cpu, 0); #endif #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4f44028943e66..30c682adcdeb8 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -317,24 +317,24 @@ static inline void seccomp_sync_threads(void) put_seccomp_filter(thread); smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); + + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + /* * Opt the other thread into seccomp if needed. * As threads are considered to be trust-realm * equivalent (see ptrace_may_access), it is safe to * allow one thread to transition the other. */ - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { - /* - * Don't let an unprivileged task work around - * the no_new_privs restriction by creating - * a thread that sets it up, enters seccomp, - * then dies. - */ - if (task_no_new_privs(caller)) - task_set_no_new_privs(thread); - + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); - } } } diff --git a/kernel/signal.c b/kernel/signal.c index d51c5ddd855c8..0206be728dacb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2753,12 +2753,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif #ifdef SEGV_BNDERR - err |= __put_user(from->si_lower, &to->si_lower); - err |= __put_user(from->si_upper, &to->si_upper); + if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) { + err |= __put_user(from->si_lower, &to->si_lower); + err |= __put_user(from->si_upper, &to->si_upper); + } #endif break; case __SI_CHLD: @@ -3022,7 +3025,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; int ret = copy_siginfo_from_user32(&info, uinfo); if (unlikely(ret)) return ret; @@ -3066,7 +3069,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; diff --git a/kernel/sys.c b/kernel/sys.c index a4e372b798a5f..25ae8d2e65e2d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1854,11 +1854,13 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; } - if (prctl_map.exe_fd != (u32)-1) + if (prctl_map.exe_fd != (u32)-1) { error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); - down_read(&mm->mmap_sem); - if (error) - goto out; + if (error) + return error; + } + + down_write(&mm->mmap_sem); /* * We don't validate if these members are pointing to @@ -1895,10 +1897,8 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data if (prctl_map.auxv_size) memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); - error = 0; -out: - up_read(&mm->mmap_sem); - return error; + up_write(&mm->mmap_sem); + return 0; } #endif /* CONFIG_CHECKPOINT_RESTORE */ @@ -1930,7 +1930,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EINVAL; - down_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); vma = find_vma(mm, addr); switch (opt) { @@ -2033,7 +2033,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = 0; out: - up_read(&mm->mmap_sem); + up_write(&mm->mmap_sem); return error; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2082b1a88fb9a..c3eee4c6d6c17 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1531,12 +1531,6 @@ static struct ctl_table vm_table[] = { { } }; -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) -static struct ctl_table binfmt_misc_table[] = { - { } -}; -#endif - static struct ctl_table fs_table[] = { { .procname = "inode-nr", @@ -1690,7 +1684,7 @@ static struct ctl_table fs_table[] = { { .procname = "binfmt_misc", .mode = 0555, - .child = binfmt_misc_table, + .child = sysctl_mount_point, }, #endif { diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ce033c7aa2e8f..9cff0ab82b635 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) { struct posix_clock *clk = get_posix_clock(fp); - int result = 0; + unsigned int result = 0; if (!clk) - return -ENODEV; + return POLLERR; if (clk->ops.poll) result = clk->ops.poll(clk, fp, wait); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 946acb72179fa..65dbf8aee751d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -316,8 +316,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) delta = timekeeping_get_delta(tkr); - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; + nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift; /* If arch requires, add in get_arch_timeoffset() */ return nsec + arch_gettimeoffset(); @@ -1615,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02bece4a99ea3..eb11011b5292a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -98,6 +98,13 @@ struct ftrace_pid { struct pid *pid; }; +static bool ftrace_pids_enabled(void) +{ + return !list_empty(&ftrace_pids); +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops); + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op, regs); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; + op->saved_func(ip, parent_ip, op, regs); } /** @@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; } static void control_ops_disable_all(struct ftrace_ops *ops) @@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + /* Always save the function, and reset at unregistering */ + ops->saved_func = ops->func; + + if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled()) + ops->func = ftrace_pid_func; + ftrace_update_trampoline(ops); if (ftrace_enabled) @@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); + ops->func = ops->saved_func; + return 0; } static void ftrace_update_pid_func(void) { + bool enabled = ftrace_pids_enabled(); + struct ftrace_ops *op; + /* Only do something if we are tracing something */ if (ftrace_trace_function == ftrace_stub) return; + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PID) { + op->func = enabled ? ftrace_pid_func : + op->saved_func; + ftrace_update_trampoline(op); + } + } while_for_each_ftrace_op(op); + update_ftrace_function(); } @@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = { .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED, + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; /* @@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; static int __init ftrace_nodyn_init(void) @@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) if (WARN_ON(tr->ops->func != ftrace_stub)) printk("ftrace ops had %pS for function\n", tr->ops->func); - /* Only the top level instance does pid tracing */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } } tr->ops->func = func; tr->ops->private = tr; @@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos) { mutex_lock(&ftrace_lock); - if (list_empty(&ftrace_pids) && (!*pos)) + if (!ftrace_pids_enabled() && (!*pos)) return (void *) 1; return seq_list_start(&ftrace_pids, *pos); @@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d2612016de94f..921691c5cb045 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,7 @@ enum { TRACE_CONTROL_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function @@ -1312,7 +1313,7 @@ void trace_event_init(void); void trace_event_enum_update(struct trace_enum_map **map, int len); #else static inline void __init trace_event_init(void) { } -static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { } +static inline void trace_event_enum_update(struct trace_enum_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 57cbf1efdd440..1879980f06c22 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_branch *entry; struct ring_buffer *buffer; unsigned long flags; - int cpu, pc; + int pc; const char *p; + if (current->trace_recursion & TRACE_BRANCH_BIT) + return; + /* * I would love to save just the ftrace_likely_data pointer, but * this code can also be used by modules. Ugly things can happen @@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); - if (atomic_inc_return(&data->disabled) != 1) + raw_local_irq_save(flags); + current->trace_recursion |= TRACE_BRANCH_BIT; + data = this_cpu_ptr(tr->trace_buffer.data); + if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); @@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) __buffer_unlock_commit(buffer, event); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + current->trace_recursion &= ~TRACE_BRANCH_BIT; + raw_local_irq_restore(flags); } static inline diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c4de47fc5cca0..f69ec1295b0b9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -683,7 +683,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ - if (call->class && call->class->reg) + if (call->class && call->class->reg && + !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7f2e97ce71a7d..52adf02d76191 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1056,6 +1056,9 @@ static void parse_init(struct filter_parse_state *ps, static char infix_next(struct filter_parse_state *ps) { + if (!ps->infix.cnt) + return 0; + ps->infix.cnt--; return ps->infix.string[ps->infix.tail++]; @@ -1071,6 +1074,9 @@ static char infix_peek(struct filter_parse_state *ps) static void infix_advance(struct filter_parse_state *ps) { + if (!ps->infix.cnt) + return; + ps->infix.cnt--; ps->infix.tail++; } @@ -1385,7 +1391,9 @@ static int check_preds(struct filter_parse_state *ps) if (elt->op != OP_NOT) cnt--; n_normal_preds++; - WARN_ON_ONCE(cnt < 0); + /* all ops should have operands */ + if (cnt < 0) + break; } if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 586ad91300b0f..6d631161705c5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -127,6 +127,11 @@ enum { * * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. * + * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. + * + * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or + * sched-RCU for reads. + * * WQ: wq->mutex protected. * * WR: wq->mutex protected for writes. Sched-RCU protected for reads. @@ -247,8 +252,8 @@ struct workqueue_struct { int nr_drainers; /* WQ: drain in progress */ int saved_max_active; /* WQ: saved pwq max_active */ - struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */ - struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */ + struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ + struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */ #ifdef CONFIG_SYSFS struct wq_device *wq_dev; /* I: for sysfs interface */ @@ -268,7 +273,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ - struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */ + struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */ }; static struct kmem_cache *pwq_cache; @@ -347,6 +352,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); lockdep_is_held(&wq->mutex), \ "sched RCU or wq->mutex should be held") +#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ + rcu_lockdep_assert(rcu_read_lock_sched_held() || \ + lockdep_is_held(&wq->mutex) || \ + lockdep_is_held(&wq_pool_mutex), \ + "sched RCU, wq->mutex or wq_pool_mutex should be held") + #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ @@ -551,7 +562,8 @@ static int worker_pool_assign_id(struct worker_pool *pool) * @wq: the target workqueue * @node: the node ID * - * This must be called either with pwq_lock held or sched RCU read locked. + * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU + * read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * @@ -560,7 +572,17 @@ static int worker_pool_assign_id(struct worker_pool *pool) static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, int node) { - assert_rcu_or_wq_mutex(wq); + assert_rcu_or_wq_mutex_or_pool_mutex(wq); + + /* + * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a + * delayed item is pending. The plan is to keep CPU -> NODE + * mapping valid and stable across CPU on/offlines. Once that + * happens, this workaround can be removed. + */ + if (unlikely(node == NUMA_NO_NODE)) + return wq->dfl_pwq; + return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } @@ -3425,17 +3447,6 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, return pwq; } -/* undo alloc_unbound_pwq(), used only in the error path */ -static void free_unbound_pwq(struct pool_workqueue *pwq) -{ - lockdep_assert_held(&wq_pool_mutex); - - if (pwq) { - put_unbound_pool(pwq->pool); - kmem_cache_free(pwq_cache, pwq); - } -} - /** * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node * @attrs: the wq_attrs of interest @@ -3488,6 +3499,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq, { struct pool_workqueue *old_pwq; + lockdep_assert_held(&wq_pool_mutex); lockdep_assert_held(&wq->mutex); /* link_pwq() can handle duplicate calls */ @@ -3498,42 +3510,48 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq, return old_pwq; } -/** - * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue - * @wq: the target workqueue - * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() - * - * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA - * machines, this function maps a separate pwq to each NUMA node with - * possibles CPUs in @attrs->cpumask so that work items are affine to the - * NUMA node it was issued on. Older pwqs are released as in-flight work - * items finish. Note that a work item which repeatedly requeues itself - * back-to-back will stay on its current pwq. - * - * Performs GFP_KERNEL allocations. - * - * Return: 0 on success and -errno on failure. - */ -int apply_workqueue_attrs(struct workqueue_struct *wq, - const struct workqueue_attrs *attrs) +/* context to store the prepared attrs & pwqs before applying */ +struct apply_wqattrs_ctx { + struct workqueue_struct *wq; /* target workqueue */ + struct workqueue_attrs *attrs; /* attrs to apply */ + struct pool_workqueue *dfl_pwq; + struct pool_workqueue *pwq_tbl[]; +}; + +/* free the resources after success or abort */ +static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx) { + if (ctx) { + int node; + + for_each_node(node) + put_pwq_unlocked(ctx->pwq_tbl[node]); + put_pwq_unlocked(ctx->dfl_pwq); + + free_workqueue_attrs(ctx->attrs); + + kfree(ctx); + } +} + +/* allocate the attrs and pwqs for later installation */ +static struct apply_wqattrs_ctx * +apply_wqattrs_prepare(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) +{ + struct apply_wqattrs_ctx *ctx; struct workqueue_attrs *new_attrs, *tmp_attrs; - struct pool_workqueue **pwq_tbl, *dfl_pwq; - int node, ret; + int node; - /* only unbound workqueues can change attributes */ - if (WARN_ON(!(wq->flags & WQ_UNBOUND))) - return -EINVAL; + lockdep_assert_held(&wq_pool_mutex); - /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]), + GFP_KERNEL); - pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(GFP_KERNEL); tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); - if (!pwq_tbl || !new_attrs || !tmp_attrs) - goto enomem; + if (!ctx || !new_attrs || !tmp_attrs) + goto out_free; /* make a copy of @attrs and sanitize it */ copy_workqueue_attrs(new_attrs, attrs); @@ -3546,76 +3564,112 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, */ copy_workqueue_attrs(tmp_attrs, new_attrs); - /* - * CPUs should stay stable across pwq creations and installations. - * Pin CPUs, determine the target cpumask for each node and create - * pwqs accordingly. - */ - get_online_cpus(); - - mutex_lock(&wq_pool_mutex); - /* * If something goes wrong during CPU up/down, we'll fall back to * the default pwq covering whole @attrs->cpumask. Always create * it even if we don't use it immediately. */ - dfl_pwq = alloc_unbound_pwq(wq, new_attrs); - if (!dfl_pwq) - goto enomem_pwq; + ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); + if (!ctx->dfl_pwq) + goto out_free; for_each_node(node) { if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) { - pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); - if (!pwq_tbl[node]) - goto enomem_pwq; + ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); + if (!ctx->pwq_tbl[node]) + goto out_free; } else { - dfl_pwq->refcnt++; - pwq_tbl[node] = dfl_pwq; + ctx->dfl_pwq->refcnt++; + ctx->pwq_tbl[node] = ctx->dfl_pwq; } } - mutex_unlock(&wq_pool_mutex); + ctx->attrs = new_attrs; + ctx->wq = wq; + free_workqueue_attrs(tmp_attrs); + return ctx; + +out_free: + free_workqueue_attrs(tmp_attrs); + free_workqueue_attrs(new_attrs); + apply_wqattrs_cleanup(ctx); + return NULL; +} + +/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */ +static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx) +{ + int node; /* all pwqs have been created successfully, let's install'em */ - mutex_lock(&wq->mutex); + mutex_lock(&ctx->wq->mutex); - copy_workqueue_attrs(wq->unbound_attrs, new_attrs); + copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); /* save the previous pwq and install the new one */ for_each_node(node) - pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]); + ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node, + ctx->pwq_tbl[node]); /* @dfl_pwq might not have been used, ensure it's linked */ - link_pwq(dfl_pwq); - swap(wq->dfl_pwq, dfl_pwq); + link_pwq(ctx->dfl_pwq); + swap(ctx->wq->dfl_pwq, ctx->dfl_pwq); - mutex_unlock(&wq->mutex); + mutex_unlock(&ctx->wq->mutex); +} - /* put the old pwqs */ - for_each_node(node) - put_pwq_unlocked(pwq_tbl[node]); - put_pwq_unlocked(dfl_pwq); +/** + * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue + * @wq: the target workqueue + * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() + * + * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA + * machines, this function maps a separate pwq to each NUMA node with + * possibles CPUs in @attrs->cpumask so that work items are affine to the + * NUMA node it was issued on. Older pwqs are released as in-flight work + * items finish. Note that a work item which repeatedly requeues itself + * back-to-back will stay on its current pwq. + * + * Performs GFP_KERNEL allocations. + * + * Return: 0 on success and -errno on failure. + */ +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) +{ + struct apply_wqattrs_ctx *ctx; + int ret = -ENOMEM; - put_online_cpus(); - ret = 0; - /* fall through */ -out_free: - free_workqueue_attrs(tmp_attrs); - free_workqueue_attrs(new_attrs); - kfree(pwq_tbl); - return ret; + /* only unbound workqueues can change attributes */ + if (WARN_ON(!(wq->flags & WQ_UNBOUND))) + return -EINVAL; + + /* creating multiple pwqs breaks ordering guarantee */ + if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) + return -EINVAL; + + /* + * CPUs should stay stable across pwq creations and installations. + * Pin CPUs, determine the target cpumask for each node and create + * pwqs accordingly. + */ + get_online_cpus(); + mutex_lock(&wq_pool_mutex); + + ctx = apply_wqattrs_prepare(wq, attrs); + + /* the ctx has been prepared successfully, let's commit it */ + if (ctx) { + apply_wqattrs_commit(ctx); + ret = 0; + } -enomem_pwq: - free_unbound_pwq(dfl_pwq); - for_each_node(node) - if (pwq_tbl && pwq_tbl[node] != dfl_pwq) - free_unbound_pwq(pwq_tbl[node]); mutex_unlock(&wq_pool_mutex); put_online_cpus(); -enomem: - ret = -ENOMEM; - goto out_free; + + apply_wqattrs_cleanup(ctx); + + return ret; } /** diff --git a/lib/bitmap.c b/lib/bitmap.c index 64c0926f5dd8e..40162f87ea2d7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -506,12 +506,12 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, unsigned a, b; int c, old_c, totaldigits; const char __user __force *ubuf = (const char __user __force *)buf; - int exp_digit, in_range; + int at_start, in_range; totaldigits = c = 0; bitmap_zero(maskp, nmaskbits); do { - exp_digit = 1; + at_start = 1; in_range = 0; a = b = 0; @@ -540,11 +540,10 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, break; if (c == '-') { - if (exp_digit || in_range) + if (at_start || in_range) return -EINVAL; b = 0; in_range = 1; - exp_digit = 1; continue; } @@ -554,16 +553,18 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, b = b * 10 + (c - '0'); if (!in_range) a = b; - exp_digit = 0; + at_start = 0; totaldigits++; } if (!(a <= b)) return -EINVAL; if (b >= nmaskbits) return -ERANGE; - while (a <= b) { - set_bit(a, maskp); - a++; + if (!at_start) { + while (a <= b) { + set_bit(a, maskp); + a++; + } } } while (buflen && c == ','); return 0; diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 6dd0335ea61b2..0234361b24b89 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -743,12 +743,12 @@ STATIC int INIT bunzip2(unsigned char *buf, long len, } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, long len, +STATIC int INIT __decompress(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *outbuf, + unsigned char *outbuf, long olen, long *pos, - void(*error)(char *x)) + void (*error)(char *x)) { return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); } diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d4c7891635ecc..555c06bf20daa 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -1,4 +1,5 @@ #ifdef STATIC +#define PREBOOT /* Pre-boot environment: included */ /* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots @@ -33,23 +34,23 @@ static long INIT nofill(void *buffer, unsigned long len) } /* Included from initramfs et al code */ -STATIC int INIT gunzip(unsigned char *buf, long len, +STATIC int INIT __gunzip(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *out_buf, + unsigned char *out_buf, long out_len, long *pos, void(*error)(char *x)) { u8 *zbuf; struct z_stream_s *strm; int rc; - size_t out_len; rc = -1; if (flush) { out_len = 0x8000; /* 32 K */ out_buf = malloc(out_len); } else { - out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ + if (!out_len) + out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ } if (!out_buf) { error("Out of memory while allocating output buffer"); @@ -181,4 +182,24 @@ STATIC int INIT gunzip(unsigned char *buf, long len, return rc; /* returns Z_OK (0) if successful */ } -#define decompress gunzip +#ifndef PREBOOT +STATIC int INIT gunzip(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, + long *pos, + void (*error)(char *x)) +{ + return __gunzip(buf, len, fill, flush, out_buf, 0, pos, error); +} +#else +STATIC int INIT __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, long out_len, + long *pos, + void (*error)(char *x)) +{ + return __gunzip(buf, len, fill, flush, out_buf, out_len, pos, error); +} +#endif diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 40f66ebe57b77..036fc882cd725 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -196,12 +196,12 @@ STATIC inline int INIT unlz4(u8 *input, long in_len, } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, long in_len, +STATIC int INIT __decompress(unsigned char *buf, long in_len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *output, + unsigned char *output, long out_len, long *posp, - void(*error)(char *x) + void (*error)(char *x) ) { return unlz4(buf, in_len - 4, fill, flush, output, posp, error); diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0be83af62b884..decb64629c146 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -667,13 +667,12 @@ STATIC inline int INIT unlzma(unsigned char *buf, long in_len, } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, long in_len, +STATIC int INIT __decompress(unsigned char *buf, long in_len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *output, + unsigned char *output, long out_len, long *posp, - void(*error)(char *x) - ) + void (*error)(char *x)) { return unlzma(buf, in_len - 4, fill, flush, output, posp, error); } diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index b94a31bdd87d1..f4c158e3a022a 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -31,6 +31,7 @@ */ #ifdef STATIC +#define PREBOOT #include "lzo/lzo1x_decompress_safe.c" #else #include @@ -287,4 +288,14 @@ STATIC int INIT unlzo(u8 *input, long in_len, return ret; } -#define decompress unlzo +#ifdef PREBOOT +STATIC int INIT __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, long olen, + long *pos, + void (*error)(char *x)) +{ + return unlzo(buf, len, fill, flush, out_buf, pos, error); +} +#endif diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index b07a78340e9d3..25d59a95bd668 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -394,4 +394,14 @@ STATIC int INIT unxz(unsigned char *in, long in_size, * This macro is used by architecture-specific files to decompress * the kernel image. */ -#define decompress unxz +#ifdef XZ_PREBOOT +STATIC int INIT __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, long olen, + long *pos, + void (*error)(char *x)) +{ + return unxz(buf, len, fill, flush, out_buf, pos, error); +} +#endif diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ae4b65e17e648..517a568f038dd 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -574,6 +574,9 @@ void debug_dma_assert_idle(struct page *page) unsigned long flags; phys_addr_t cln; + if (dma_debug_disabled()) + return; + if (!page) return; @@ -1178,7 +1181,7 @@ static inline bool overlap(void *addr, unsigned long len, void *start, void *end static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) { - if (overlap(addr, len, _text, _etext) || + if (overlap(addr, len, _stext, _etext) || overlap(addr, len, __start_rodata, __end_rodata)) err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 6745c6230db34..c30d07e99dba4 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1); asmlinkage __visible void dump_stack(void) { + unsigned long flags; int was_locked; int old; int cpu; @@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void) * Permit this cpu to perform nested stack dumps while serialising * against other CPUs */ - preempt_disable(); - retry: + local_irq_save(flags); cpu = smp_processor_id(); old = atomic_cmpxchg(&dump_lock, -1, cpu); if (old == -1) { @@ -43,6 +43,7 @@ asmlinkage __visible void dump_stack(void) } else if (old == cpu) { was_locked = 1; } else { + local_irq_restore(flags); cpu_relax(); goto retry; } @@ -52,7 +53,7 @@ asmlinkage __visible void dump_stack(void) if (!was_locked) atomic_set(&dump_lock, -1); - preempt_enable(); + local_irq_restore(flags); } #else asmlinkage __visible void dump_stack(void) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f1cdeb024d172..6a823a53e357b 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -44,7 +44,7 @@ static void fail_dump(struct fault_attr *attr) printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " "space %d, times %d\n", attr->dname, - attr->probability, attr->interval, + attr->interval, attr->probability, atomic_read(&attr->space), atomic_read(&attr->times)); if (attr->verbose > 1) diff --git a/lib/iommu-common.c b/lib/iommu-common.c index df30632f0bef9..4fdeee02e0a94 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -21,8 +21,7 @@ static DEFINE_PER_CPU(unsigned int, iommu_hash_common); static inline bool need_flush(struct iommu_map_table *iommu) { - return (iommu->lazy_flush != NULL && - (iommu->flags & IOMMU_NEED_FLUSH) != 0); + return ((iommu->flags & IOMMU_NEED_FLUSH) != 0); } static inline void set_flush(struct iommu_map_table *iommu) @@ -211,7 +210,8 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, goto bail; } } - if (n < pool->hint || need_flush(iommu)) { + if (iommu->lazy_flush && + (n < pool->hint || need_flush(iommu))) { clear_flush(iommu); iommu->lazy_flush(iommu); } diff --git a/lib/klist.c b/lib/klist.c index 89b485a2a58d1..2a072bfaeacef 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -282,9 +282,9 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n) { i->i_klist = k; - i->i_cur = n; - if (n) - kref_get(&n->n_ref); + i->i_cur = NULL; + if (n && kref_get_unless_zero(&n->n_ref)) + i->i_cur = n; } EXPORT_SYMBOL_GPL(klist_iter_init_node); diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 6a08ce7d6adc0..acf9da449f816 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -74,3 +74,4 @@ module_exit(libcrc32c_mod_fini); MODULE_AUTHOR("Clay Haapala "); MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32c"); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3d2aa27b845b5..8399002aa0f02 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1014,9 +1014,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_slot(slot, root, &iter, first_index) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } @@ -1093,9 +1097,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 8609378e65051..5b17447efa8bf 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -506,10 +506,11 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; - mutex_lock(&ht->mutex); - iter->walker->tbl = rht_dereference(ht->tbl, ht); + spin_lock(&ht->lock); + iter->walker->tbl = + rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); list_add(&iter->walker->list, &iter->walker->tbl->walkers); - mutex_unlock(&ht->mutex); + spin_unlock(&ht->lock); return 0; } @@ -523,10 +524,10 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); */ void rhashtable_walk_exit(struct rhashtable_iter *iter) { - mutex_lock(&iter->ht->mutex); + spin_lock(&iter->ht->lock); if (iter->walker->tbl) list_del(&iter->walker->list); - mutex_unlock(&iter->ht->mutex); + spin_unlock(&iter->ht->lock); kfree(iter->walker); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); @@ -550,14 +551,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) { struct rhashtable *ht = iter->ht; - mutex_lock(&ht->mutex); + rcu_read_lock(); + spin_lock(&ht->lock); if (iter->walker->tbl) list_del(&iter->walker->list); - - rcu_read_lock(); - - mutex_unlock(&ht->mutex); + spin_unlock(&ht->lock); if (!iter->walker->tbl) { iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); @@ -612,6 +611,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) iter->skip = 0; } + iter->p = NULL; + /* Ensure we see any new tables. */ smp_rmb(); @@ -622,8 +623,6 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) return ERR_PTR(-EAGAIN); } - iter->p = NULL; - out: return obj; @@ -730,9 +729,6 @@ int rhashtable_init(struct rhashtable *ht, if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) return -EINVAL; - if (params->nelem_hint) - size = rounded_hashtable_size(params); - memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); @@ -752,6 +748,9 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + if (params->nelem_hint) + size = rounded_hashtable_size(&ht->p); + /* The maximum (not average) chain length grows with the * size of the hash table, at a rate of (log N)/(log log N). * The value of 16 is selected so that even if the hash diff --git a/lib/string_helpers.c b/lib/string_helpers.c index c98ae818eb4ee..33e79b5eea779 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -43,46 +43,73 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, [STRING_UNITS_10] = 1000, [STRING_UNITS_2] = 1024, }; - int i, j; - u32 remainder = 0, sf_cap, exp; + static const unsigned int rounding[] = { 500, 50, 5 }; + int i = 0, j; + u32 remainder = 0, sf_cap; char tmp[8]; const char *unit; tmp[0] = '\0'; - i = 0; - if (!size) + + if (blk_size == 0) + size = 0; + if (size == 0) goto out; - while (blk_size >= divisor[units]) { - remainder = do_div(blk_size, divisor[units]); + /* This is Napier's algorithm. Reduce the original block size to + * + * coefficient * divisor[units]^i + * + * we do the reduction so both coefficients are just under 32 bits so + * that multiplying them together won't overflow 64 bits and we keep + * as much precision as possible in the numbers. + * + * Note: it's safe to throw away the remainders here because all the + * precision is in the coefficients. + */ + while (blk_size >> 32) { + do_div(blk_size, divisor[units]); i++; } - exp = divisor[units] / (u32)blk_size; - if (size >= exp) { - remainder = do_div(size, divisor[units]); - remainder *= blk_size; + while (size >> 32) { + do_div(size, divisor[units]); i++; - } else { - remainder *= size; } + /* now perform the actual multiplication keeping i as the sum of the + * two logarithms */ size *= blk_size; - size += remainder / divisor[units]; - remainder %= divisor[units]; + /* and logarithmically reduce it until it's just under the divisor */ while (size >= divisor[units]) { remainder = do_div(size, divisor[units]); i++; } + /* work out in j how many digits of precision we need from the + * remainder */ sf_cap = size; for (j = 0; sf_cap*10 < 1000; j++) sf_cap *= 10; - if (j) { + if (units == STRING_UNITS_2) { + /* express the remainder as a decimal. It's currently the + * numerator of a fraction whose denominator is + * divisor[units], which is 1 << 10 for STRING_UNITS_2 */ remainder *= 1000; - remainder /= divisor[units]; + remainder >>= 10; + } + + /* add a 5 to the digit below what will be printed to ensure + * an arithmetical round up and carry it through to size */ + remainder += rounding[j]; + if (remainder >= 1000) { + remainder -= 1000; + size += 1; + } + + if (j) { snprintf(tmp, sizeof(tmp), ".%03u", remainder); tmp[j+1] = '\0'; } diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index fcad8322ef367..b640609bcd177 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -61,6 +61,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) bool dequeued_page; dequeued_page = false; + spin_lock_irqsave(&b_dev_info->pages_lock, flags); list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) { /* * Block others from accessing the 'page' while we get around @@ -75,15 +76,14 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) continue; } #endif - spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_delete(page); __count_vm_event(BALLOON_DEFLATE); - spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); unlock_page(page); dequeued_page = true; break; } } + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); if (!dequeued_page) { /* diff --git a/mm/filemap.c b/mm/filemap.c index 6bf5e42d560a4..1ffef05f1c1fd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2461,6 +2461,11 @@ ssize_t generic_perform_write(struct file *file, break; } + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); if (unlikely(status < 0)) @@ -2498,10 +2503,6 @@ ssize_t generic_perform_write(struct file *file, written += copied; balance_dirty_pages_ratelimited(mapping); - if (fatal_signal_pending(current)) { - status = -EINTR; - break; - } } while (iov_iter_count(i)); return written ? written : status; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 078832cf36365..8e792ec5e84c6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2137,7 +2137,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, address += PAGE_SIZE) { pte_t pteval = *_pte; - if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { + if (pte_none(pteval) || (pte_present(pteval) && + is_zero_pfn(pte_pfn(pteval)))) { if (++none_or_zero <= khugepaged_max_ptes_none) continue; else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 271e4432734c3..a6ff935476e3d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -40,6 +40,11 @@ int hugepages_treat_as_movable; int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; +/* + * Minimum page order among possible hugepage sizes, set to a proper value + * at boot time. + */ +static unsigned int minimum_order __read_mostly = UINT_MAX; __initdata LIST_HEAD(huge_boot_pages); @@ -1188,19 +1193,13 @@ static void dissolve_free_huge_page(struct page *page) */ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) { - unsigned int order = 8 * sizeof(void *); unsigned long pfn; - struct hstate *h; if (!hugepages_supported()) return; - /* Set scan step to minimum hugepage size */ - for_each_hstate(h) - if (order > huge_page_order(h)) - order = huge_page_order(h); - VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order)); - for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) + VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order)); + for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) dissolve_free_huge_page(pfn_to_page(pfn)); } @@ -1627,10 +1626,14 @@ static void __init hugetlb_init_hstates(void) struct hstate *h; for_each_hstate(h) { + if (minimum_order > huge_page_order(h)) + minimum_order = huge_page_order(h); + /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } + VM_BUG_ON(minimum_order == UINT_MAX); } static char * __init memfmt(char *buf, unsigned long n) @@ -2893,6 +2896,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, if (iter_vma == vma) continue; + /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f0fe4f2c1fa7a..3716cdb8ba420 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -195,6 +195,8 @@ static struct kmem_cache *scan_area_cache; /* set if tracing memory operations is enabled */ static int kmemleak_enabled; +/* same as above but only for the kmemleak_free() callback */ +static int kmemleak_free_enabled; /* set in the late_initcall if there were no errors */ static int kmemleak_initialized; /* enables or disables early logging of the memory operations */ @@ -907,12 +909,13 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc); * kmemleak_alloc_percpu - register a newly allocated __percpu object * @ptr: __percpu pointer to beginning of the object * @size: size of the object + * @gfp: flags used for kmemleak internal memory allocations * * This function is called from the kernel percpu allocator when a new object - * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL - * allocation. + * (memory block) is allocated (alloc_percpu). */ -void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { unsigned int cpu; @@ -925,7 +928,7 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) if (kmemleak_enabled && ptr && !IS_ERR(ptr)) for_each_possible_cpu(cpu) create_object((unsigned long)per_cpu_ptr(ptr, cpu), - size, 0, GFP_KERNEL); + size, 0, gfp); else if (kmemleak_early_log) log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0); } @@ -942,7 +945,7 @@ void __ref kmemleak_free(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); - if (kmemleak_enabled && ptr && !IS_ERR(ptr)) + if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) delete_object_full((unsigned long)ptr); else if (kmemleak_early_log) log_early(KMEMLEAK_FREE, ptr, 0, 0); @@ -982,7 +985,7 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr) pr_debug("%s(0x%p)\n", __func__, ptr); - if (kmemleak_enabled && ptr && !IS_ERR(ptr)) + if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) for_each_possible_cpu(cpu) delete_object_full((unsigned long)per_cpu_ptr(ptr, cpu)); @@ -1750,6 +1753,13 @@ static void kmemleak_do_cleanup(struct work_struct *work) mutex_lock(&scan_mutex); stop_scan_thread(); + /* + * Once the scan thread has stopped, it is safe to no longer track + * object freeing. Ordering of the scan thread stopping and the memory + * accesses below is guaranteed by the kthread_stop() function. + */ + kmemleak_free_enabled = 0; + if (!kmemleak_found_leaks) __kmemleak_do_cleanup(); else @@ -1776,6 +1786,8 @@ static void kmemleak_disable(void) /* check whether it is too early for a kernel thread */ if (kmemleak_initialized) schedule_work(&cleanup_work); + else + kmemleak_free_enabled = 0; pr_info("Kernel memory leak detector disabled\n"); } @@ -1840,8 +1852,10 @@ void __init kmemleak_init(void) if (kmemleak_error) { local_irq_restore(flags); return; - } else + } else { kmemleak_enabled = 1; + kmemleak_free_enabled = 1; + } local_irq_restore(flags); /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a04225d372ba3..aac1c98a9bc7c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3677,6 +3677,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, ret = page_counter_memparse(args, "-1", &threshold); if (ret) return ret; + threshold <<= PAGE_SHIFT; mutex_lock(&memcg->thresholds_lock); @@ -3823,16 +3824,17 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, swap_buffers: /* Swap primary and spare array */ thresholds->spare = thresholds->primary; - /* If all events are unregistered, free the spare array */ - if (!new) { - kfree(thresholds->spare); - thresholds->spare = NULL; - } rcu_assign_pointer(thresholds->primary, new); /* To be sure that nobody uses thresholds */ synchronize_rcu(); + + /* If all events are unregistered, free the spare array */ + if (!new) { + kfree(thresholds->spare); + thresholds->spare = NULL; + } unlock: mutex_unlock(&memcg->thresholds_lock); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 501820c815b33..e26bc59d7dffa 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1557,7 +1557,9 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) * Did it turn free? */ ret = __get_any_page(page, pfn, 0); - if (!PageLRU(page)) { + if (ret == 1 && !PageLRU(page)) { + /* Drop page reference which is from __get_any_page() */ + put_page(page); pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", pfn, page->flags); return -EIO; @@ -1587,13 +1589,12 @@ static int soft_offline_huge_page(struct page *page, int flags) unlock_page(hpage); ret = isolate_huge_page(hpage, &pagelist); - if (ret) { - /* - * get_any_page() and isolate_huge_page() takes a refcount each, - * so need to drop one here. - */ - put_page(hpage); - } else { + /* + * get_any_page() and isolate_huge_page() takes a refcount each, + * so need to drop one here. + */ + put_page(hpage); + if (!ret) { pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); return -EBUSY; } diff --git a/mm/memory.c b/mm/memory.c index 22e037e3364e0..701d9ad45c46f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2669,6 +2669,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3097,6 +3101,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3242,13 +3249,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); @@ -3357,8 +3363,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(pmd_none(*pmd)) && unlikely(__pte_alloc(mm, vma, pmd, address))) return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) + /* + * If a huge pmd materialized under us just retry later. Use + * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd + * didn't become pmd_trans_huge under us and then back to pmd_none, as + * a result of MADV_DONTNEED running immediately after a huge pmd fault + * in a different thread of this mm, in turn leading to a misleading + * pmd_trans_huge() retval. All we have to ensure is that it is a + * regular pmd that we can walk with pte_offset_map() and we can do that + * through an atomic read in C, which is what pmd_trans_unstable() + * provides. + */ + if (unlikely(pmd_trans_unstable(pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 747743237d9f4..99d4c1d0b8583 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1972,35 +1972,41 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, pol = get_vma_policy(vma, addr); cpuset_mems_cookie = read_mems_allowed_begin(); - if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage && - pol->mode != MPOL_INTERLEAVE)) { + if (pol->mode == MPOL_INTERLEAVE) { + unsigned nid; + + nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); + mpol_cond_put(pol); + page = alloc_page_interleave(gfp, order, nid); + goto out; + } + + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { + int hpage_node = node; + /* * For hugepage allocation and non-interleave policy which - * allows the current node, we only try to allocate from the - * current node and don't fall back to other nodes, as the - * cost of remote accesses would likely offset THP benefits. + * allows the current node (or other explicitly preferred + * node) we only try to allocate from the current/preferred + * node and don't fall back to other nodes, as the cost of + * remote accesses would likely offset THP benefits. * * If the policy is interleave, or does not allow the current * node in its nodemask, we allocate the standard way. */ + if (pol->mode == MPOL_PREFERRED && + !(pol->flags & MPOL_F_LOCAL)) + hpage_node = pol->v.preferred_node; + nmask = policy_nodemask(gfp, pol); - if (!nmask || node_isset(node, *nmask)) { + if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = alloc_pages_exact_node(node, + page = alloc_pages_exact_node(hpage_node, gfp | __GFP_THISNODE, order); goto out; } } - if (pol->mode == MPOL_INTERLEAVE) { - unsigned nid; - - nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); - mpol_cond_put(pol); - page = alloc_page_interleave(gfp, order, nid); - goto out; - } - nmask = policy_nodemask(gfp, pol); zl = policy_zonelist(gfp, pol, node); mpol_cond_put(pol); diff --git a/mm/migrate.c b/mm/migrate.c index f53838fe3dfe6..8c4841a6dc4ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1062,7 +1062,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (rc != MIGRATEPAGE_SUCCESS && put_new_page) put_new_page(new_hpage, private); else - put_page(new_hpage); + putback_active_hugepage(new_hpage); if (result) { if (rc) @@ -1557,7 +1557,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & - ~GFP_IOFS, 0); + ~__GFP_WAIT, 0); return newpage; } diff --git a/mm/mlock.c b/mm/mlock.c index 6fd2cf15e8687..3d3ee6cad7767 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -172,7 +172,7 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int nr_pages; + int nr_pages; struct zone *zone = page_zone(page); /* For try_to_munlock() and to serialize with page migration */ diff --git a/mm/mmap.c b/mm/mmap.c index bb50cacc3ea57..d30b8f8f02b19 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -440,12 +440,16 @@ static void validate_mm(struct mm_struct *mm) struct vm_area_struct *vma = mm->mmap; while (vma) { + struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; - vma_lock_anon_vma(vma); - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - anon_vma_interval_tree_verify(avc); - vma_unlock_anon_vma(vma); + if (anon_vma) { + anon_vma_lock_read(anon_vma); + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_verify(avc); + anon_vma_unlock_read(anon_vma); + } + highest_address = vma->vm_end; vma = vma->vm_next; i++; @@ -2141,32 +2145,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns */ int expand_upwards(struct vm_area_struct *vma, unsigned long address) { - int error; + int error = 0; if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; - /* - * We must make sure the anon_vma is allocated - * so that the anon_vma locking is not a noop. - */ + /* Guard against wrapping around to address 0. */ + if (address < PAGE_ALIGN(address+4)) + address = PAGE_ALIGN(address+4); + else + return -ENOMEM; + + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; - vma_lock_anon_vma(vma); /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. - * Also guard against wrapping around to address 0. */ - if (address < PAGE_ALIGN(address+4)) - address = PAGE_ALIGN(address+4); - else { - vma_unlock_anon_vma(vma); - return -ENOMEM; - } - error = 0; + anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address > vma->vm_end) { @@ -2184,7 +2183,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) * updates, but we only hold a shared mmap_sem * lock here, so we need to protect against * concurrent vma expansions. - * vma_lock_anon_vma() doesn't help here, as + * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard @@ -2204,7 +2203,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } } } - vma_unlock_anon_vma(vma); + anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(vma->vm_mm); return error; @@ -2219,25 +2218,21 @@ int expand_downwards(struct vm_area_struct *vma, { int error; - /* - * We must make sure the anon_vma is allocated - * so that the anon_vma locking is not a noop. - */ - if (unlikely(anon_vma_prepare(vma))) - return -ENOMEM; - address &= PAGE_MASK; error = security_mmap_addr(address); if (error) return error; - vma_lock_anon_vma(vma); + /* We must make sure the anon_vma is allocated. */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. */ + anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address < vma->vm_start) { @@ -2255,7 +2250,7 @@ int expand_downwards(struct vm_area_struct *vma, * updates, but we only hold a shared mmap_sem * lock here, so we need to protect against * concurrent vma expansions. - * vma_lock_anon_vma() doesn't help here, as + * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard @@ -2273,7 +2268,7 @@ int expand_downwards(struct vm_area_struct *vma, } } } - vma_unlock_anon_vma(vma); + anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(vma->vm_mm); return error; @@ -2659,12 +2654,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (!vma || !(vma->vm_flags & VM_SHARED)) goto out; - if (start < vma->vm_start || start + size > vma->vm_end) + if (start < vma->vm_start) goto out; - if (pgoff == linear_page_index(vma, start)) { - ret = 0; - goto out; + if (start + size > vma->vm_end) { + struct vm_area_struct *next; + + for (next = vma->vm_next; next; next = next->vm_next) { + /* hole between vmas ? */ + if (next->vm_start != next->vm_prev->vm_end) + goto out; + + if (next->vm_file != vma->vm_file) + goto out; + + if (next->vm_flags != vma->vm_flags) + goto out; + + if (start + size <= next->vm_end) + break; + } + + if (!next) + goto out; } prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; @@ -2674,9 +2686,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags &= MAP_NONBLOCK; flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; if (vma->vm_flags & VM_LOCKED) { + struct vm_area_struct *tmp; flags |= MAP_LOCKED; + /* drop PG_Mlocked flag for over-mapped range */ - munlock_vma_pages_range(vma, start, start + size); + for (tmp = vma; tmp->vm_start >= start + size; + tmp = tmp->vm_next) { + munlock_vma_pages_range(tmp, + max(tmp->vm_start, start), + min(tmp->vm_end, start + size)); + } } file = get_file(vma->vm_file); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c04..18490f3bd7f1f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -983,12 +983,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, set_page_owner(page, order, gfp_flags); /* - * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to + * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to * allocate the page. The expectation is that the caller is taking * steps that will free more memory. The caller should avoid the page * being used for !PFMEMALLOC purposes. */ - page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + if (alloc_flags & ALLOC_NO_WATERMARKS) + set_page_pfmemalloc(page); + else + clear_page_pfmemalloc(page); return 0; } diff --git a/mm/percpu.c b/mm/percpu.c index dfd02484e8de1..2dd74487a0aff 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1030,7 +1030,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); - kmemleak_alloc_percpu(ptr, size); + kmemleak_alloc_percpu(ptr, size, gfp); return ptr; fail_unlock: diff --git a/mm/slab.c b/mm/slab.c index 7eb38dd1cefa2..330039fdcf18f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1602,7 +1602,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, } /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ - if (unlikely(page->pfmemalloc)) + if (page_is_pfmemalloc(page)) pfmemalloc_active = true; nr_pages = (1 << cachep->gfporder); @@ -1613,7 +1613,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); __SetPageSlab(page); - if (page->pfmemalloc) + if (page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { @@ -2189,9 +2189,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { + /* + * To activate debug pagealloc, off-slab management is necessary + * requirement. In early phase of initialization, small sized slab + * doesn't get initialized so it would not be possible. So, we need + * to check size >= 256. It guarantees that all necessary small + * sized slab is initialized in current slab initialization sequence. + */ + if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && + size >= 256 && cachep->object_size > cache_line_size() && + ALIGN(size, cachep->align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } diff --git a/mm/slub.c b/mm/slub.c index 54c0876b43d55..08342c523a85b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1427,7 +1427,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) inc_slabs_node(s, page_to_nid(page), page->objects); page->slab_cache = s; __SetPageSlab(page); - if (page->pfmemalloc) + if (page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); start = page_address(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 5e8eadd71bac7..1a17bd7c0ce58 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -937,21 +937,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, * * 2) Global reclaim encounters a page, memcg encounters a * page that is not marked for immediate reclaim or - * the caller does not have __GFP_IO. In this case mark + * the caller does not have __GFP_FS (or __GFP_IO if it's + * simply going to swap, not to fs). In this case mark * the page for immediate reclaim and continue scanning. * - * __GFP_IO is checked because a loop driver thread might + * Require may_enter_fs because we would wait on fs, which + * may not have submitted IO yet. And the loop driver might * enter reclaim, and deadlock if it waits on a page for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * - * Don't require __GFP_FS, since we're not going into the - * FS, just waiting on its writeback completion. Worryingly, - * ext4 gfs2 and xfs allocate pages with - * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing - * may_enter_fs here is liable to OOM on them. - * * 3) memcg encounters a page that is not already marked * PageReclaim. memcg does not have any dirty pages * throttling so we could easily OOM just because too many @@ -968,7 +964,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Case 2 above */ } else if (global_reclaim(sc) || - !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { + !PageReclaim(page) || !may_enter_fs) { /* * This is slightly racy - end_page_writeback() * might have just cleared PageReclaim, then @@ -1157,7 +1153,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageSwapCache(page)) try_to_free_swap(page); unlock_page(page); - putback_lru_page(page); + list_add(&page->lru, &ret_pages); continue; activate_locked: diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a8b5e749e84e7..fb1ec10ce449d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -306,7 +306,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle) static void record_obj(unsigned long handle, unsigned long obj) { - *(unsigned long *)handle = obj; + /* + * lsb of @obj represents handle lock while other bits + * represent object value the handle is pointing so + * updating shouldn't do store tearing. + */ + WRITE_ONCE(*(unsigned long *)handle, obj); } /* zpool driver */ @@ -1641,6 +1646,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, free_obj = obj_malloc(d_page, class, handle); zs_object_copy(used_obj, free_obj, class); index++; + /* + * record_obj updates handle's value to free_obj and it will + * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which + * breaks synchronization using pin_tag(e,g, zs_free) so + * let's keep the lock bit. + */ + free_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, free_obj); unpin_tag(handle); obj_free(pool, class, used_obj); diff --git a/net/9p/client.c b/net/9p/client.c index 6f4c4c88db84e..fcf6fe063d826 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -843,7 +843,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (err < 0) { if (err == -EIO) c->status = Disconnected; - goto reterr; + if (err != -ERESTARTSYS) + goto reterr; } if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); @@ -1540,6 +1541,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (unsigned long long) offset, (int)iov_iter_count(to)); @@ -1615,6 +1617,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", fid->fid, (unsigned long long) offset, @@ -1647,6 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (*err) { trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); + break; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 330c1f4a5a0b6..a64884bbf0cea 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -806,6 +806,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; ax25_cb *ax25; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ac4b96eccadeb..bd3357e69c5cb 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -112,21 +112,17 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) } /* finally deinitialize the claim */ -static void batadv_claim_free_rcu(struct rcu_head *rcu) +static void batadv_claim_release(struct batadv_bla_claim *claim) { - struct batadv_bla_claim *claim; - - claim = container_of(rcu, struct batadv_bla_claim, rcu); - batadv_backbone_gw_free_ref(claim->backbone_gw); - kfree(claim); + kfree_rcu(claim, rcu); } /* free a claim, call claim_free_rcu if its the last reference */ static void batadv_claim_free_ref(struct batadv_bla_claim *claim) { if (atomic_dec_and_test(&claim->refcount)) - call_rcu(&claim->rcu, batadv_claim_free_rcu); + batadv_claim_release(claim); } /** diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index aad022dd15df5..95b3167cf036b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include #include #include @@ -422,7 +423,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int j; /* check if orig node candidate is running DAT */ - if (!(candidate->capabilities & BATADV_ORIG_CAPA_HAS_DAT)) + if (!test_bit(BATADV_ORIG_CAPA_HAS_DAT, &candidate->capabilities)) goto out; /* Check if this node has already been selected... */ @@ -682,9 +683,9 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint16_t tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) - orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_DAT; + clear_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities); else - orig->capabilities |= BATADV_ORIG_CAPA_HAS_DAT; + set_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities); } /** diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 1918cd50b62ed..b6bff9c1877ac 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -64,18 +64,6 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); } -/** - * batadv_hardif_free_ref_now - decrement the hard interface refcounter and - * possibly free it (without rcu callback) - * @hard_iface: the hard interface to free - */ -static inline void -batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface) -{ - if (atomic_dec_and_test(&hard_iface->refcount)) - batadv_hardif_free_rcu(&hard_iface->rcu); -} - static inline struct batadv_hard_iface * batadv_primary_if_get_selected(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index b24e4bb64fb5f..8653c1a506f4c 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -15,6 +15,8 @@ * along with this program; if not, see . */ +#include +#include #include "main.h" #include "multicast.h" #include "originator.h" @@ -565,19 +567,26 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, * * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator, * orig, has toggled then this method updates counter and list accordingly. + * + * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, uint8_t mcast_flags) { + struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node; + struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list; + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) { atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_add_head_rcu(&orig->mcast_want_all_unsnoopables_node, - &bat_priv->mcast.want_all_unsnoopables_list); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(!hlist_unhashed(node)); + + hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) && @@ -585,7 +594,10 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_del_rcu(&orig->mcast_want_all_unsnoopables_node); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(hlist_unhashed(node)); + + hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } @@ -598,19 +610,26 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, * * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has * toggled then this method updates counter and list accordingly. + * + * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, uint8_t mcast_flags) { + struct hlist_node *node = &orig->mcast_want_all_ipv4_node; + struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list; + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_add_head_rcu(&orig->mcast_want_all_ipv4_node, - &bat_priv->mcast.want_all_ipv4_list); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(!hlist_unhashed(node)); + + hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) && @@ -618,7 +637,10 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, atomic_dec(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_del_rcu(&orig->mcast_want_all_ipv4_node); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(hlist_unhashed(node)); + + hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } @@ -631,19 +653,26 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, * * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has * toggled then this method updates counter and list accordingly. + * + * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, uint8_t mcast_flags) { + struct hlist_node *node = &orig->mcast_want_all_ipv6_node; + struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list; + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_add_head_rcu(&orig->mcast_want_all_ipv6_node, - &bat_priv->mcast.want_all_ipv6_list); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(!hlist_unhashed(node)); + + hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) && @@ -651,7 +680,10 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, atomic_dec(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_del_rcu(&orig->mcast_want_all_ipv6_node); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(hlist_unhashed(node)); + + hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } @@ -674,39 +706,42 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint8_t mcast_flags = BATADV_NO_FLAGS; bool orig_initialized; - orig_initialized = orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST; + if (orig_mcast_enabled && tvlv_value && + (tvlv_value_len >= sizeof(mcast_flags))) + mcast_flags = *(uint8_t *)tvlv_value; + + spin_lock_bh(&orig->mcast_handler_lock); + orig_initialized = test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig->capa_initialized); /* If mcast support is turned on decrease the disabled mcast node * counter only if we had increased it for this node before. If this * is a completely new orig_node no need to decrease the counter. */ if (orig_mcast_enabled && - !(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) { + !test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) { if (orig_initialized) atomic_dec(&bat_priv->mcast.num_disabled); - orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST; + set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); /* If mcast support is being switched off or if this is an initial * OGM without mcast support then increase the disabled mcast * node counter. */ } else if (!orig_mcast_enabled && - (orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST || + (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) || !orig_initialized)) { atomic_inc(&bat_priv->mcast.num_disabled); - orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST; + clear_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); } - orig->capa_initialized |= BATADV_ORIG_CAPA_HAS_MCAST; - - if (orig_mcast_enabled && tvlv_value && - (tvlv_value_len >= sizeof(mcast_flags))) - mcast_flags = *(uint8_t *)tvlv_value; + set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized); batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); orig->mcast_flags = mcast_flags; + spin_unlock_bh(&orig->mcast_handler_lock); } /** @@ -740,11 +775,15 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; - if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) && - orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST) + spin_lock_bh(&orig->mcast_handler_lock); + + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) && + test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized)) atomic_dec(&bat_priv->mcast.num_disabled); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); + + spin_unlock_bh(&orig->mcast_handler_lock); } diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 127cc4d7380a1..2fbd3a6bde9ae 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include #include "main.h" @@ -105,9 +106,9 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint16_t tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) - orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_NC; + clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); else - orig->capabilities |= BATADV_ORIG_CAPA_HAS_NC; + set_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); } /** @@ -174,28 +175,25 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } /** - * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove - * its refcount on the orig_node - * @rcu: rcu pointer of the nc node + * batadv_nc_node_release - release nc_node from lists and queue for free after + * rcu grace period + * @nc_node: the nc node to free */ -static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +static void batadv_nc_node_release(struct batadv_nc_node *nc_node) { - struct batadv_nc_node *nc_node; - - nc_node = container_of(rcu, struct batadv_nc_node, rcu); batadv_orig_node_free_ref(nc_node->orig_node); - kfree(nc_node); + kfree_rcu(nc_node, rcu); } /** - * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly - * frees it + * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly + * release it * @nc_node: the nc node to free */ static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) { if (atomic_dec_and_test(&nc_node->refcount)) - call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); + batadv_nc_node_release(nc_node); } /** @@ -871,7 +869,7 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, goto out; /* check if orig node is network coding enabled */ - if (!(orig_node->capabilities & BATADV_ORIG_CAPA_HAS_NC)) + if (!test_bit(BATADV_ORIG_CAPA_HAS_NC, &orig_node->capabilities)) goto out; /* accept ogms from 'good' neighbors and single hop neighbors */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 90e805aba3795..77ea1d4de2ba8 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -150,86 +150,58 @@ int batadv_originator_init(struct batadv_priv *bat_priv) } /** - * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object - * @rcu: rcu pointer of the neigh_ifinfo object - */ -static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu) -{ - struct batadv_neigh_ifinfo *neigh_ifinfo; - - neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu); - - if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) - batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing); - - kfree(neigh_ifinfo); -} - -/** - * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free - * the neigh_ifinfo (without rcu callback) + * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for + * free after rcu grace period * @neigh_ifinfo: the neigh_ifinfo object to release */ static void -batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo) +batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo) { - if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu); + if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); + + kfree_rcu(neigh_ifinfo, rcu); } /** - * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free + * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release * the neigh_ifinfo * @neigh_ifinfo: the neigh_ifinfo object to release */ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) { if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); + batadv_neigh_ifinfo_release(neigh_ifinfo); } /** - * batadv_neigh_node_free_rcu - free the neigh_node - * @rcu: rcu pointer of the neigh_node + * batadv_neigh_node_release - release neigh_node from lists and queue for + * free after rcu grace period + * @neigh_node: neigh neighbor to free */ -static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) +static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) { struct hlist_node *node_tmp; - struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; - neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); - hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { - batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } - batadv_hardif_free_ref_now(neigh_node->if_incoming); - - kfree(neigh_node); -} + batadv_hardif_free_ref(neigh_node->if_incoming); -/** - * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter - * and possibly free it (without rcu callback) - * @neigh_node: neigh neighbor to free - */ -static void -batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node) -{ - if (atomic_dec_and_test(&neigh_node->refcount)) - batadv_neigh_node_free_rcu(&neigh_node->rcu); + kfree_rcu(neigh_node, rcu); } /** * batadv_neigh_node_free_ref - decrement the neighbors refcounter - * and possibly free it + * and possibly release it * @neigh_node: neigh neighbor to free */ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) { if (atomic_dec_and_test(&neigh_node->refcount)) - call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu); + batadv_neigh_node_release(neigh_node); } /** @@ -495,108 +467,99 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, } /** - * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object - * @rcu: rcu pointer of the orig_ifinfo object + * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for + * free after rcu grace period + * @orig_ifinfo: the orig_ifinfo object to release */ -static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) +static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) { - struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router; - orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); - if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) - batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + batadv_hardif_free_ref(orig_ifinfo->if_outgoing); /* this is the last reference to this object */ router = rcu_dereference_protected(orig_ifinfo->router, true); if (router) - batadv_neigh_node_free_ref_now(router); - kfree(orig_ifinfo); + batadv_neigh_node_free_ref(router); + + kfree_rcu(orig_ifinfo, rcu); } /** - * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free - * the orig_ifinfo (without rcu callback) + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release + * the orig_ifinfo * @orig_ifinfo: the orig_ifinfo object to release */ -static void -batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo) +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) { if (atomic_dec_and_test(&orig_ifinfo->refcount)) - batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu); + batadv_orig_ifinfo_release(orig_ifinfo); } /** - * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free - * the orig_ifinfo - * @orig_ifinfo: the orig_ifinfo object to release + * batadv_orig_node_free_rcu - free the orig_node + * @rcu: rcu pointer of the orig_node */ -void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) +static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { - if (atomic_dec_and_test(&orig_ifinfo->refcount)) - call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); + struct batadv_orig_node *orig_node; + + orig_node = container_of(rcu, struct batadv_orig_node, rcu); + + batadv_mcast_purge_orig(orig_node); + + batadv_frag_purge_orig(orig_node, NULL); + + if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) + orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); + + kfree(orig_node->tt_buff); + kfree(orig_node); } -static void batadv_orig_node_free_rcu(struct rcu_head *rcu) +/** + * batadv_orig_node_release - release orig_node from lists and queue for + * free after rcu grace period + * @orig_node: the orig node to free + */ +static void batadv_orig_node_release(struct batadv_orig_node *orig_node) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; - struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo; - orig_node = container_of(rcu, struct batadv_orig_node, rcu); - spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); - batadv_neigh_node_free_ref_now(neigh_node); + batadv_neigh_node_free_ref(neigh_node); } hlist_for_each_entry_safe(orig_ifinfo, node_tmp, &orig_node->ifinfo_list, list) { hlist_del_rcu(&orig_ifinfo->list); - batadv_orig_ifinfo_free_ref_now(orig_ifinfo); + batadv_orig_ifinfo_free_ref(orig_ifinfo); } spin_unlock_bh(&orig_node->neigh_list_lock); - batadv_mcast_purge_orig(orig_node); - /* Free nc_nodes */ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); - batadv_frag_purge_orig(orig_node, NULL); - - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) - orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); - - kfree(orig_node->tt_buff); - kfree(orig_node); + call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } /** * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly - * schedule an rcu callback for freeing it + * release it * @orig_node: the orig node to free */ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) { if (atomic_dec_and_test(&orig_node->refcount)) - call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); -} - -/** - * batadv_orig_node_free_ref_now - decrement the orig node refcounter and - * possibly free it (without rcu callback) - * @orig_node: the orig node to free - */ -void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node) -{ - if (atomic_dec_and_test(&orig_node->refcount)) - batadv_orig_node_free_rcu(&orig_node->rcu); + batadv_orig_node_release(orig_node); } void batadv_originator_free(struct batadv_priv *bat_priv) @@ -678,8 +641,13 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, orig_node->last_seen = jiffies; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; + #ifdef CONFIG_BATMAN_ADV_MCAST orig_node->mcast_flags = BATADV_NO_FLAGS; + INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node); + INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node); + INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node); + spin_lock_init(&orig_node->mcast_handler_lock); #endif /* create a vlan object for the "untagged" LAN */ diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index aa4a436962956..28b751ad549cc 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -25,7 +25,6 @@ int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); -void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5ec31d7de24f1..a0b1b861b968e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -172,6 +172,7 @@ static int batadv_interface_tx(struct sk_buff *skb, int gw_mode; enum batadv_forw_mode forw_mode; struct batadv_orig_node *mcast_single_orig = NULL; + int network_offset = ETH_HLEN; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -184,14 +185,18 @@ static int batadv_interface_tx(struct sk_buff *skb, case ETH_P_8021Q: vhdr = vlan_eth_hdr(skb); - if (vhdr->h_vlan_encapsulated_proto != ethertype) + if (vhdr->h_vlan_encapsulated_proto != ethertype) { + network_offset += VLAN_HLEN; break; + } /* fall through */ case ETH_P_BATMAN: goto dropped; } + skb_set_network_header(skb, network_offset); + if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; @@ -449,6 +454,9 @@ void batadv_interface_rx(struct net_device *soft_iface, */ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) { + if (!vlan) + return; + if (atomic_dec_and_test(&vlan->refcount)) { spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); hlist_del_rcu(&vlan->list); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 07b263a437d1b..ddd62c9af5b4c 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include "main.h" #include "translation-table.h" #include "soft-interface.h" @@ -218,20 +219,6 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, return count; } -static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_orig_list_entry *orig_entry; - - orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); - - /* We are in an rcu callback here, therefore we cannot use - * batadv_orig_node_free_ref() and its call_rcu(): - * An rcu_barrier() wouldn't wait for that to finish - */ - batadv_orig_node_free_ref_now(orig_entry->orig_node); - kfree(orig_entry); -} - /** * batadv_tt_local_size_mod - change the size by v of the local table identified * by vid @@ -327,13 +314,25 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, batadv_tt_global_size_mod(orig_node, vid, -1); } +/** + * batadv_tt_orig_list_entry_release - release tt orig entry from lists and + * queue for free after rcu grace period + * @orig_entry: tt orig entry to be free'd + */ +static void +batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry) +{ + batadv_orig_node_free_ref(orig_entry->orig_node); + kfree_rcu(orig_entry, rcu); +} + static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { if (!atomic_dec_and_test(&orig_entry->refcount)) return; - call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); + batadv_tt_orig_list_entry_release(orig_entry); } /** @@ -575,6 +574,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); + if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", + addr, BATADV_PRINT_VID(vid))) + goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", @@ -1015,6 +1017,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; struct batadv_softif_vlan *vlan; + void *tt_entry_exists; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1042,11 +1045,22 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, * immediately purge it */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - hlist_del_rcu(&tt_local_entry->common.hash_entry); + + tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_local_entry->common); + if (!tt_entry_exists) + goto out; + + /* extra call to free the local tt entry */ batadv_tt_local_entry_free_ref(tt_local_entry); /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) + goto out; + batadv_softif_vlan_free_ref(vlan); batadv_softif_vlan_free_ref(vlan); @@ -1147,8 +1161,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, tt_common_entry->vid); - batadv_softif_vlan_free_ref(vlan); - batadv_softif_vlan_free_ref(vlan); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + } batadv_tt_local_entry_free_ref(tt_local); } @@ -1843,7 +1859,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, } spin_unlock_bh(list_lock); } - orig_node->capa_initialized &= ~BATADV_ORIG_CAPA_HAS_TT; + clear_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); } static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global, @@ -2802,7 +2818,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, return; } } - orig_node->capa_initialized |= BATADV_ORIG_CAPA_HAS_TT; + set_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); } static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, @@ -3188,8 +3204,10 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); - batadv_softif_vlan_free_ref(vlan); - batadv_softif_vlan_free_ref(vlan); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + } batadv_tt_local_entry_free_ref(tt_local); } @@ -3302,7 +3320,8 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, bool has_tt_init; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff; - has_tt_init = orig_node->capa_initialized & BATADV_ORIG_CAPA_HAS_TT; + has_tt_init = test_bit(BATADV_ORIG_CAPA_HAS_TT, + &orig_node->capa_initialized); /* orig table not initialised AND first diff is in the OGM OR the ttvn * increased by one -> we can apply the attached changes diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9398c3fb41747..26c37be2aa051 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -204,6 +204,7 @@ struct batadv_orig_bat_iv { * @batadv_dat_addr_t: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset + * @mcast_handler_lock: synchronizes mcast-capability and -flag changes * @mcast_flags: multicast flags announced by the orig node * @mcast_want_all_unsnoop_node: a list node for the * mcast.want_all_unsnoopables list @@ -251,13 +252,15 @@ struct batadv_orig_node { unsigned long last_seen; unsigned long bcast_seqno_reset; #ifdef CONFIG_BATMAN_ADV_MCAST + /* synchronizes mcast tvlv specific orig changes */ + spinlock_t mcast_handler_lock; uint8_t mcast_flags; struct hlist_node mcast_want_all_unsnoopables_node; struct hlist_node mcast_want_all_ipv4_node; struct hlist_node mcast_want_all_ipv6_node; #endif - uint8_t capabilities; - uint8_t capa_initialized; + unsigned long capabilities; + unsigned long capa_initialized; atomic_t last_ttvn; unsigned char *tt_buff; int16_t tt_buff_len; @@ -296,10 +299,10 @@ struct batadv_orig_node { * (= orig node announces a tvlv of type BATADV_TVLV_MCAST) */ enum batadv_orig_capabilities { - BATADV_ORIG_CAPA_HAS_DAT = BIT(0), - BATADV_ORIG_CAPA_HAS_NC = BIT(1), - BATADV_ORIG_CAPA_HAS_TT = BIT(2), - BATADV_ORIG_CAPA_HAS_MCAST = BIT(3), + BATADV_ORIG_CAPA_HAS_DAT, + BATADV_ORIG_CAPA_HAS_NC, + BATADV_ORIG_CAPA_HAS_TT, + BATADV_ORIG_CAPA_HAS_MCAST, }; /** diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 56f9edbf3d05d..e11a5cfda4b1c 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -741,10 +741,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } - if (test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || + if (test_bit(HCI_INIT, &hdev->flags) || hci_dev_test_flag(hdev, HCI_SETUP) || - hci_dev_test_flag(hdev, HCI_CONFIG)) { + hci_dev_test_flag(hdev, HCI_CONFIG) || + (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && + test_bit(HCI_UP, &hdev->flags))) { err = -EBUSY; hci_dev_put(hdev); goto done; @@ -760,10 +761,21 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = hci_dev_open(hdev->id); if (err) { - hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); - mgmt_index_added(hdev); - hci_dev_put(hdev); - goto done; + if (err == -EALREADY) { + /* In case the transport is already up and + * running, clear the error here. + * + * This can happen when opening an user + * channel and HCI_AUTO_OFF grace period + * is still active. + */ + err = 0; + } else { + hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); + mgmt_index_added(hdev); + hci_dev_put(hdev); + goto done; + } } atomic_inc(&hdev->promisc); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 9070dfd6b4adc..4a0015e16d4f3 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg) { struct hidp_session *session = (struct hidp_session *) arg; + /* The HIDP user-space API only contains calls to add and remove + * devices. There is no way to forward events of any kind. Therefore, + * we have to forcefully disconnect a device on idle-timeouts. This is + * unfortunate and weird API design, but it is spec-compliant and + * required for backwards-compatibility. Hence, on idle-timeout, we + * signal driver-detach events, so poll() will be woken up with an + * error-condition on both sockets. + */ + + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + wake_up_interruptible(sk_sleep(session->intr_sock->sk)); + wake_up_interruptible(sk_sleep(session->ctrl_sock->sk)); + hidp_session_terminate(session); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7fd87e7135b52..58d60cbbc33f8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2962,6 +2962,11 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, } else { u8 addr_type; + if (cp->addr.type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (conn) { @@ -2977,13 +2982,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, */ if (!cp->disconnect) conn = NULL; + } else { + hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type); } - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4322c833e7489..8611bc7bdd327 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -520,6 +520,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_sco)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != BT_OPEN) { diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 1ab3dc9c8f99b..69ad5091e2cef 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2294,8 +2294,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (!conn) return 1; - chan = conn->smp; - if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; @@ -2309,6 +2307,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) return 0; + chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } + l2cap_chan_lock(chan); /* If SMP is already in progress ignore this request */ diff --git a/net/bridge/br.c b/net/bridge/br.c index 02c24cf63c344..c72e01cf09d09 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +/* called with RTNL */ static int br_netdev_switch_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -130,7 +131,6 @@ static int br_netdev_switch_event(struct notifier_block *unused, struct netdev_switch_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; - rtnl_lock(); p = br_port_get_rtnl(dev); if (!p) goto out; @@ -155,7 +155,6 @@ static int br_netdev_switch_event(struct notifier_block *unused, } out: - rtnl_unlock(); return err; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 4ff77a16956c2..3d6c8e2223916 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -28,6 +28,8 @@ const struct nf_br_ops __rcu *nf_br_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_br_ops); +static struct lock_class_key bridge_netdev_addr_lock_key; + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -87,6 +89,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static void br_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); +} + static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -99,6 +106,7 @@ static int br_dev_init(struct net_device *dev) err = br_vlan_init(br); if (err) free_percpu(br->stats); + br_set_lockdep_class(dev); return err; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b5d2ccf..0ff6e1bbca910 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) } else { skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index a9a4a1b7863d1..8d423bc649b9c 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); br_stp_set_bridge_priority(br, args[1]); - spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_PORT_PRIORITY: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70b3000b..d1f910c0d586a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -348,7 +348,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -ENOMEM; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; } @@ -371,6 +370,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -417,6 +417,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_other_query.timer)) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ff667e18b2d63..9ba383f5b0c4b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -980,7 +980,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); - len = sizeof(*ih); + len = skb_transport_offset(skb) + sizeof(*ih); for (i = 0; i < num; i++) { len += sizeof(*grec); @@ -1035,7 +1035,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); - len = sizeof(*icmp6h); + len = skb_transport_offset(skb) + sizeof(*icmp6h); for (i = 0; i < num; i++) { __be16 *nsrcs, _nsrcs; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4b5c236998ff1..a7559ef312bdf 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -112,6 +112,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + 0; } @@ -504,6 +506,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -711,9 +715,17 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *tb[], struct nlattr *data[]) { + struct net_bridge *br = netdev_priv(brdev); + int ret; + if (!data) return 0; - return br_setport(br_port_get_rtnl(dev), data); + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 41146872c1b47..ce658abdc2c8d 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -128,7 +128,10 @@ static void br_stp_start(struct net_bridge *br) char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (net_eq(dev_net(br->dev), &init_net)) + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + else + r = -ENOENT; spin_lock_bh(&br->lock); @@ -243,12 +246,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) return true; } -/* called under bridge lock */ +/* Acquires and releases bridge lock */ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) { struct net_bridge_port *p; int wasroot; + spin_lock_bh(&br->lock); wasroot = br_is_root_bridge(br); list_for_each_entry(p, &br->port_list, list) { @@ -266,6 +270,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); + spin_unlock_bh(&br->lock); } /* called under bridge lock */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc9..62c635f2bcfc5 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -679,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453585be7..a1ba6875c2a20 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748cbb4ec..2e67b1423cd32 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79e8f71aef5be..3f76eb84b395f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -495,8 +495,11 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) struct ceph_options *opt = client->options; size_t pos = m->count; - if (opt->name) - seq_printf(m, "name=%s,", opt->name); + if (opt->name) { + seq_puts(m, "name="); + seq_escape(m, opt->name, ", \t\n\\"); + seq_putc(m, ','); + } if (opt->key) seq_puts(m, "secret=,"); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f0436..84201c21705ef 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -675,6 +675,8 @@ static void reset_connection(struct ceph_connection *con) } con->in_seq = 0; con->in_seq_acked = 0; + + con->out_skip = 0; } /* @@ -774,6 +776,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) static void con_out_kvec_reset(struct ceph_connection *con) { + BUG_ON(con->out_skip); + con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; @@ -782,9 +786,9 @@ static void con_out_kvec_reset(struct ceph_connection *con) static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { - int index; + int index = con->out_kvec_left; - index = con->out_kvec_left; + BUG_ON(con->out_skip); BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); con->out_kvec[index].iov_len = size; @@ -793,6 +797,27 @@ static void con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +/* + * Chop off a kvec from the end. Return residual number of bytes for + * that kvec, i.e. how many bytes would have been written if the kvec + * hadn't been nuked. + */ +static int con_out_kvec_skip(struct ceph_connection *con) +{ + int off = con->out_kvec_cur - con->out_kvec; + int skip = 0; + + if (con->out_kvec_bytes > 0) { + skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; + BUG_ON(con->out_kvec_bytes < skip); + BUG_ON(!con->out_kvec_left); + con->out_kvec_bytes -= skip; + con->out_kvec_left--; + } + + return skip; +} + #ifdef CONFIG_BLOCK /* @@ -1178,6 +1203,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, return new_piece; } +static size_t sizeof_footer(struct ceph_connection *con) +{ + return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? + sizeof(struct ceph_msg_footer) : + sizeof(struct ceph_msg_footer_old); +} + static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { BUG_ON(!msg); @@ -1200,7 +1232,6 @@ static void prepare_write_message_footer(struct ceph_connection *con) m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); - con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) @@ -1228,7 +1259,6 @@ static void prepare_write_message(struct ceph_connection *con) u32 crc; con_out_kvec_reset(con); - con->out_kvec_is_msg = true; con->out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same @@ -1268,18 +1298,19 @@ static void prepare_write_message(struct ceph_connection *con) /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); - /* fill in crc (except data pages), footer */ + /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = 0; + memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); + /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { @@ -1291,6 +1322,7 @@ static void prepare_write_message(struct ceph_connection *con) dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); + con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; @@ -1485,7 +1517,6 @@ static int write_partial_kvec(struct ceph_connection *con) } } con->out_kvec_left = 0; - con->out_kvec_is_msg = false; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, @@ -1577,6 +1608,7 @@ static int write_partial_skip(struct ceph_connection *con) { int ret; + dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); @@ -2301,9 +2333,9 @@ static int read_partial_message(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; - return 0; + return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); @@ -2333,10 +2365,10 @@ static int read_partial_message(struct ceph_connection *con) /* skip this message */ dout("alloc_msg said skip message\n"); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; - return 0; + return 1; } BUG_ON(!con->in_msg); @@ -2493,13 +2525,13 @@ static int try_write(struct ceph_connection *con) more_kvec: /* kvec data queued? */ - if (con->out_skip) { - ret = write_partial_skip(con); + if (con->out_kvec_left) { + ret = write_partial_kvec(con); if (ret <= 0) goto out; } - if (con->out_kvec_left) { - ret = write_partial_kvec(con); + if (con->out_skip) { + ret = write_partial_skip(con); if (ret <= 0) goto out; } @@ -3026,16 +3058,31 @@ void ceph_msg_revoke(struct ceph_msg *msg) ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("%s %p msg %p - was sending\n", __func__, con, msg); - con->out_msg = NULL; - if (con->out_kvec_is_msg) { - con->out_skip = con->out_kvec_bytes; - con->out_kvec_is_msg = false; + BUG_ON(con->out_skip); + /* footer */ + if (con->out_msg_done) { + con->out_skip += con_out_kvec_skip(con); + } else { + BUG_ON(!msg->data_length); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) + con->out_skip += sizeof(msg->footer); + else + con->out_skip += sizeof(msg->old_footer); } + /* data, middle, front */ + if (msg->data_length) + con->out_skip += msg->cursor.total_resid; + if (msg->middle) + con->out_skip += con_out_kvec_skip(con); + con->out_skip += con_out_kvec_skip(con); + + dout("%s %p msg %p - was sending, will write %d skip %d\n", + __func__, con, msg, con->out_kvec_bytes, con->out_skip); msg->hdr.seq = 0; - + con->out_msg = NULL; ceph_msg_put(msg); } + mutex_unlock(&con->mutex); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 15796696d64ed..4a3125836b64a 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end, { int j; dout("crush_decode_tree_bucket %p to %p\n", *p, end); - ceph_decode_32_safe(p, end, b->num_nodes, bad); + ceph_decode_8_safe(p, end, b->num_nodes, bad); b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); if (b->node_weights == NULL) return -ENOMEM; diff --git a/net/core/datagram.c b/net/core/datagram.c index b80fb91bb3f7e..617088aee21d4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,6 +131,35 @@ static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, goto out; } +static struct sk_buff *skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return skb; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return ERR_PTR(-ENOMEM); + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return skb; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -165,7 +194,9 @@ static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -199,7 +228,12 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + skb = skb_set_peeked(skb); + error = PTR_ERR(skb); + if (IS_ERR(skb)) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -223,6 +257,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; @@ -622,7 +658,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); } - skb->csum_valid = !sum; + if (!skb_shared(skb)) + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); @@ -642,11 +679,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) netdev_rx_csum_fault(skb->dev); } - /* Save full packet checksum */ - skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - skb->csum_valid = !sum; + if (!skb_shared(skb)) { + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + } return sum; } diff --git a/net/core/dev.c b/net/core/dev.c index 8a33735a53abc..17b037eb87d2b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,10 +672,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); @@ -2483,6 +2479,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) * * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. + * + * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. */ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) @@ -2497,6 +2495,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); SKB_GSO_CB(skb)->encap_level = 0; @@ -3341,6 +3342,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3362,6 +3365,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -3667,8 +3671,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3678,7 +3680,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3708,7 +3710,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; } skb->tc_verd = 0; @@ -3725,7 +3727,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3737,7 +3739,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3791,8 +3793,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3823,29 +3824,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4390,8 +4392,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { @@ -6027,6 +6031,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6297,7 +6302,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { @@ -6650,8 +6656,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ diff --git a/net/core/dst.c b/net/core/dst.c index e956ce6d13782..540066cb33efc 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -282,10 +282,11 @@ void dst_release(struct dst_entry *dst) { if (dst) { int newrefcnt; + unsigned short nocache = dst->flags & DST_NOCACHE; newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + if (!newrefcnt && unlikely(nocache)) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1d00b89229024..4a6824767f3d4 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1273,7 +1273,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) gstrings.len = ret; - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); if (!data) return -ENOMEM; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a12668f7d627..0ad144fb0c792 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -615,15 +615,17 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, { int idx = 0; struct fib_rule *rule; + int err = 0; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWRULE, - NLM_F_MULTI, ops) < 0) + err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops); + if (err) break; skip: idx++; @@ -632,7 +634,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, cb->args[1] = idx; rules_ops_put(ops); - return skb->len; + return err; } static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) @@ -648,7 +650,9 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) if (ops == NULL) return -EAFNOSUPPORT; - return dump_rules(skb, cb, ops); + dump_rules(skb, cb, ops); + + return skb->len; } rcu_read_lock(); diff --git a/net/core/filter.c b/net/core/filter.c index bf831a85c3159..238bb3f9c51dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -775,6 +775,11 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) if (ftest->k == 0) return -EINVAL; break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + if (ftest->k >= 32) + return -EINVAL; + break; case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: @@ -1526,9 +1531,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; /* We're copying the filter that has been originally attached, - * so no conversion/decode needed anymore. + * so no conversion/decode needed anymore. eBPF programs that + * have no original program cannot be dumped through this. */ + ret = -EACCES; fprog = filter->prog->orig_prog; + if (!fprog) + goto out; ret = fprog->len; if (!len) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2c35c02a931e2..3556791fdc6eb 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -113,7 +113,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; - __be32 flow_label; ipv6: iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); @@ -130,8 +129,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); - flow_label = ip6_flowlabel(iph); - if (flow_label) { + if (skb && ip6_flowlabel(iph)) { + __be32 flow_label = ip6_flowlabel(iph); + /* Awesome, IPv6 packet has a flow label so we can * use that to represent the ports without any * further dissection. @@ -233,6 +233,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, return false; proto = eth->h_proto; nhoff += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = nhoff; } goto again; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3de6542560288..d6e8cfcb6f7cd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -957,6 +957,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; + if (neigh->dead) + goto out_dead; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + @@ -1013,6 +1015,13 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) write_unlock(&neigh->lock); local_bh_enable(); return rc; + +out_dead: + if (neigh->nud_state & NUD_STALE) + goto out_unlock_bh; + write_unlock_bh(&neigh->lock); + kfree_skb(skb); + return 1; } EXPORT_SYMBOL(__neigh_event_send); @@ -1076,6 +1085,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; + if (neigh->dead) + goto out; if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1225,6 +1236,8 @@ EXPORT_SYMBOL(neigh_update); */ void __neigh_set_probe_once(struct neighbour *neigh) { + if (neigh->dead) + return; neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; @@ -2194,7 +2207,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; - ndm->ndm_ifindex = pn->dev->ifindex; + ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) @@ -2269,7 +2282,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { - if (dev_net(n->dev) != net) + if (pneigh_net(n) != net) continue; if (idx < s_idx) goto next; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 508155b283ddc..043ea1867d0f0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3490,8 +3490,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index ff61742a791d3..a0d33d4be380a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -106,10 +106,16 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) spin_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; + /* Because of following del_timer_sync(), + * we must release the spinlock here + * or risk a dead lock. + */ + spin_unlock_bh(&queue->syn_wait_lock); atomic_inc(&lopt->qlen_dec); - if (del_timer(&req->rsk_timer)) + if (del_timer_sync(&req->rsk_timer)) reqsk_put(req); reqsk_put(req); + spin_lock_bh(&queue->syn_wait_lock); } spin_unlock_bh(&queue->syn_wait_lock); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8de36824018de..fe95cb704aaa0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1287,10 +1287,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1437,96 +1433,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1722,14 +1720,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; diff --git a/net/core/scm.c b/net/core/scm.c index 3b6899b7d810d..dce0acb929f16 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; + fpl->user = NULL; } fpp = &fpl->fp[fpl->count]; @@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fpp++ = file; fpl->count++; } + + if (!fpl->user) + fpl->user = get_uid(current_user()); + return num; } @@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm) scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); + free_uid(fpl->user); kfree(fpl); } } @@ -305,6 +311,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i*sizeof(int)); + if (msg->msg_controllen < cmlen) + cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } @@ -334,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; + new_fpl->user = get_uid(fpl->user); } return new_fpl; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e7ddb8c7ffb32..8e787a4834940 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,6 +79,8 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; +int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; +EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support @@ -340,7 +342,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) if (skb && frag_size) { skb->head_frag = 1; - if (virt_to_head_page(data)->pfmemalloc) + if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; @@ -2976,11 +2978,12 @@ EXPORT_SYMBOL(skb_append_datato_frags); */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { + unsigned char *data = skb->data; + BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; + __skb_pull(skb, len); + skb_postpull_rcsum(skb, data, len); + return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); @@ -3660,7 +3663,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_info = tstype; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= sk->sk_tskey; } @@ -4199,7 +4203,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, + 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; } diff --git a/net/core/sock.c b/net/core/sock.c index d042226abb4e0..a7bf2d65ad2d0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -864,7 +864,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { if (sk->sk_state != TCP_ESTABLISHED) { ret = -EINVAL; break; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 74dddf84adcdd..556ecf96a385b 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -86,6 +86,9 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, goto out; fprog = filter->prog->orig_prog; + if (!fprog) + goto out; + flen = bpf_classic_proglen(fprog); attr = nla_reserve(skb, attrtype, flen); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 95b6139d710c4..a6beb7b6ae556 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,7 @@ static int zero = 0; static int one = 1; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; +static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "max_skb_frags", + .data = &sysctl_max_skb_frags, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &max_skb_frags, + }, { } }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5165571f397aa..a0490508d2135 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -202,7 +202,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -219,7 +221,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6.daddr = ireq->ir_v6_rmt_addr; - err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } @@ -415,6 +420,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -534,13 +540,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (np->opt != NULL) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -793,6 +801,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -892,7 +901,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -912,9 +922,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 30addee2dd037..838f524cf11a1 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6only = sk->sk_ipv6only; } #endif - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) timeo = DCCP_TIMEWAIT_LEN; inet_twsk_schedule(tw, timeo); + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 754484b3cd0e8..2783c538ec193 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, { struct sock *sk; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 827cda560a552..57978c5b2c91d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -732,7 +732,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, return -ENODEV; /* Use already configured phy mode */ - p->phy_interface = p->phy->interface; + if (p->phy_interface == PHY_INTERFACE_MODE_NA) + p->phy_interface = p->phy->interface; phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, p->phy_interface); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b60c65f70346a..627a2537634e4 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -739,6 +739,12 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (saddr) { + /* Clear the implicit padding in struct sockaddr_ieee802154 + * (16 bits between 'family' and 'addr') and in struct + * ieee802154_addr_sa (16 bits at the end of the structure). + */ + memset(saddr, 0, sizeof(*saddr)); + saddr->family = AF_IEEE802154; ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); *addr_len = sizeof(*saddr); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 19773ec832da0..d26e424f1f96a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -232,6 +232,8 @@ int inet_listen(struct socket *sock, int backlog) err = 0; if (err) goto out; + + tcp_fastopen_init_key_once(true); } err = inet_csk_listen_start(sk, backlog); if (err) @@ -260,6 +262,9 @@ int inet_create(struct net *net, struct socket *sock, int protocol, int kern) int try_loading_module = 0; int err; + if (protocol < 0 || protocol >= IPPROTO_MAX) + return -EINVAL; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 90c0e83861161..574fad9cca052 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -20,7 +20,7 @@ #include #include -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - lock_sock(sk); - oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { @@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_set(sk, &rt->dst); err = 0; out: - release_sock(sk); return err; } +EXPORT_SYMBOL(__ip4_datagram_connect); + +int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL(ip4_datagram_connect); /* Because UDP xmit path can manipulate sk_dst_cache without holding diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 419d23c53ec75..280d46f947ea8 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1839,7 +1839,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 09b62e17dd8cb..93b8029848192 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1547,7 +1547,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) do { /* record parent and next child index */ pn = n; - cindex = key ? get_index(key, pn) : 0; + cindex = (key > pn->key) ? get_index(key, pn) : 0; if (cindex >> pn->bits) break; @@ -1780,8 +1780,6 @@ void fib_table_flush_external(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } } @@ -1852,8 +1850,6 @@ int fib_table_flush(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } @@ -2457,7 +2453,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, key = l->key + 1; iter->pos++; - if (pos-- <= 0) + if (--pos <= 0) break; l = NULL; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 34968cd5c1464..4b67937692c99 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -24,6 +24,7 @@ struct fou { u16 type; struct udp_offload udp_offloads; struct list_head list; + struct rcu_head rcu; }; #define FOU_F_REMCSUM_NOPARTIAL BIT(0) @@ -421,7 +422,7 @@ static void fou_release(struct fou *fou) list_del(&fou->list); udp_tunnel_sock_release(sock); - kfree(fou); + kfree_rcu(fou, rcu); } static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 5aa46d4b44efb..5a8ee32825508 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP))) + SKB_GSO_IPIP | + SKB_GSO_SIT))) goto out; if (!skb->encapsulation) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d30f6fdcb0c24..53ec50d4924ea 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -569,23 +569,24 @@ EXPORT_SYMBOL(inet_rtx_syn_ack); static bool reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock *req) { - struct listen_sock *lopt = queue->listen_opt; struct request_sock **prev; + struct listen_sock *lopt; bool found = false; spin_lock(&queue->syn_wait_lock); - - for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; - prev = &(*prev)->dl_next) { - if (*prev == req) { - *prev = req->dl_next; - found = true; - break; + lopt = queue->listen_opt; + if (lopt) { + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } } } - spin_unlock(&queue->syn_wait_lock); - if (del_timer(&req->rsk_timer)) + if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; } @@ -678,20 +679,20 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue, req->num_timeout = 0; req->sk = NULL; + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); + req->rsk_hash = hash; + /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ smp_wmb(); atomic_set(&req->rsk_refcnt, 2); - setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); - req->rsk_hash = hash; spin_lock(&queue->syn_wait_lock); req->dl_next = lopt->syn_table[hash]; lopt->syn_table[hash] = req; spin_unlock(&queue->syn_wait_lock); - - mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } EXPORT_SYMBOL(reqsk_queue_hash_req); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 00ec8d5d7e7ee..bb96c1c4edd68 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -153,13 +153,15 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, /* * Step 2: Hash TW into tcp ehash chain. * Notes : - * - tw_refcnt is set to 3 because : + * - tw_refcnt is set to 4 because : * - We have one reference from bhash chain. * - We have one reference from ehash chain. + * - We have one reference from timer. + * - One reference for ourself (our caller will release it). * We can use atomic_set() because prior spin_lock()/spin_unlock() * committed into memory all tw fields. */ - atomic_set(&tw->tw_refcnt, 1 + 1 + 1); + atomic_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -243,7 +245,7 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw) } EXPORT_SYMBOL(inet_twsk_deschedule); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) { /* timeout := RTO * 3.5 * @@ -271,12 +273,14 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) */ tw->tw_kill = timeo <= 4*HZ; - if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { - atomic_inc(&tw->tw_refcnt); + if (!rearm) { + BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo)); atomic_inc(&tw->tw_dr->tw_count); + } else { + mod_timer_pending(&tw->tw_timer, jiffies + timeo); } } -EXPORT_SYMBOL_GPL(inet_twsk_schedule); +EXPORT_SYMBOL_GPL(__inet_twsk_schedule); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cc1da6d9cb351..cae22a1a87770 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -342,7 +342,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ - end = offset + skb->len - ihl; + end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ @@ -372,7 +372,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (!pskb_pull(skb, ihl)) + if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -613,6 +613,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; + + ip_send_check(iph); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65b93a7b7113..51573f8a39bca 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -235,6 +235,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) * from host network stack. */ features = netif_skb_features(skb); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); @@ -893,7 +894,7 @@ static int __ip_append_data(struct sock *sk, if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7cfb0893f2636..b6c7bdea48533 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + + /* Our caller is responsible for freeing ipc->opt */ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) @@ -432,6 +434,15 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv4 supports cmsg on all imcp errors and some timestamps * * Timestamp code paths do not initialize the fields expected by cmsg: @@ -498,7 +509,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv4_datagram_support_addr(serr)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba4ba659..626d9e56a6bd2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3a2c0162c3bad..c3bfebd501ed1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); -static void mroute_clean_tables(struct mr_table *mrt); +static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -351,7 +351,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1209,7 +1209,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt) +static void mroute_clean_tables(struct mr_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1218,8 +1218,9 @@ static void mroute_clean_tables(struct mr_table *mrt) /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif_table[i].flags & VIFF_STATIC)) - vif_delete(mrt, i, 0, &list); + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) + continue; + vif_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); @@ -1227,7 +1228,7 @@ static void mroute_clean_tables(struct mr_table *mrt) for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); @@ -1262,7 +1263,7 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); } } rtnl_unlock(); @@ -1683,8 +1684,8 @@ static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); + IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1746,7 +1747,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * to blackhole. */ - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 05ff44b758dfe..f6ee0d561aab3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -745,8 +745,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 561cd4b8fc6e0..c77aac75759d9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -543,8 +543,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); goto out; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f45f2a12f37b2..1d3cdb4d4ebcb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -125,6 +125,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ @@ -753,7 +754,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, 0); + 0, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1538,6 +1539,36 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1592,11 +1623,20 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) + if (fnhe) { rth = rcu_dereference(fnhe->fnhe_rth_input); - else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(&FIB_RES_NH(*res), daddr); + fnhe = NULL; + } else { + goto rt_cache; + } + } + + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); +rt_cache: if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1945,19 +1985,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); - if (fnhe) + if (fnhe) { prth = &fnhe->fnhe_rth_output; - else { - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; + rth = rcu_dereference(*prth); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(nh, fl4->daddr); + fnhe = NULL; + } else { + goto rt_cache; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } + + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); + +rt_cache: if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; @@ -2504,7 +2554,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c3852a7ff3c76..f0e8297359680 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); net->ipv4.ip_local_ports.range[0] = range[0]; net->ipv4.ip_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5ddf781e566fa..baba5ee6cec76 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -160,7 +160,8 @@ * generates them. * Alan Cox : Cache last socket. * Alan Cox : Per route irtt. - * Matt Day : poll()->select() match BSD precisely on error + * Matt Day : poll()->select() match BSD precisely on + *error * Alan Cox : New buffers * Marc Tamsky : Various sk->prot->retransmits and * sk->retransmits misupdating fixed. @@ -168,8 +169,10 @@ * and TCP syn retries gets used now. * Mark Yarvis : In tcp_read_wakeup(), don't send an * ack if state is TCP_CLOSED. - * Alan Cox : Look up device on a retransmit - routes may - * change. Doesn't yet cope with MSS shrink right + * Alan Cox : Look up device on a retransmit - routes + *may + * change. Doesn't yet cope with MSS shrink + *right * but it's a start! * Marc Tamsky : Closing in closing fixes. * Mike Shaver : RFC1122 verifications. @@ -247,41 +250,45 @@ #define pr_fmt(fmt) "TCP: " fmt -#include -#include -#include +#include +#include +#include +#include #include -#include +#include +#include #include #include -#include -#include -#include -#include +#include +#include #include -#include +#include #include -#include -#include +#include +#include +#include +#include +#include #include -#include -#include -#include #include -#include +#include #include #include +#include #include +#include #include #include -#include -#include -#include #include +#include +#include #include +#include "../mptcp/mptcp_rbs_sched.h" +#include "../mptcp/mptcp_rbs_scheduler.h" + int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; int sysctl_tcp_min_tso_segs __read_mostly = 2; @@ -299,7 +306,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); -atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); /* @@ -373,22 +380,22 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) } const struct tcp_sock_ops tcp_specific = { - .__select_window = __tcp_select_window, - .select_window = tcp_select_window, - .select_initial_window = tcp_select_initial_window, - .select_size = select_size, - .init_buffer_space = tcp_init_buffer_space, - .set_rto = tcp_set_rto, - .should_expand_sndbuf = tcp_should_expand_sndbuf, - .send_fin = tcp_send_fin, - .write_xmit = tcp_write_xmit, - .send_active_reset = tcp_send_active_reset, - .write_wakeup = tcp_write_wakeup, - .prune_ofo_queue = tcp_prune_ofo_queue, - .retransmit_timer = tcp_retransmit_timer, - .time_wait = tcp_time_wait, - .cleanup_rbuf = tcp_cleanup_rbuf, - .cwnd_validate = tcp_cwnd_validate, + .__select_window = __tcp_select_window, + .select_window = tcp_select_window, + .select_initial_window = tcp_select_initial_window, + .select_size = select_size, + .init_buffer_space = tcp_init_buffer_space, + .set_rto = tcp_set_rto, + .should_expand_sndbuf = tcp_should_expand_sndbuf, + .send_fin = tcp_send_fin, + .write_xmit = tcp_write_xmit, + .send_active_reset = tcp_send_active_reset, + .write_wakeup = tcp_write_wakeup, + .prune_ofo_queue = tcp_prune_ofo_queue, + .retransmit_timer = tcp_retransmit_timer, + .time_wait = tcp_time_wait, + .cleanup_rbuf = tcp_cleanup_rbuf, + .cwnd_validate = tcp_cwnd_validate, }; /* Address-family independent initialization for a tcp_sock. @@ -527,8 +534,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && - !sock_flag(sk, SOCK_URGINLINE) && - tp->urg_data) + !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; /* Potential race condition. If read of tp below will @@ -540,7 +546,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; - } else { /* send SIGIO later */ + } else { /* send SIGIO later */ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -583,8 +589,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) slow = lock_sock_fast(sk); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; - else if (sock_flag(sk, SOCK_URGINLINE) || - !tp->urg_data || + else if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { @@ -622,7 +627,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } - return put_user(answ, (int __user *)arg); + return put_user(answ, (int __user *) arg); } EXPORT_SYMBOL(tcp_ioctl); @@ -642,16 +647,47 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - skb->csum = 0; - tcb->seq = tcb->end_seq = tp->write_seq; + skb->csum = 0; + tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; - tcb->sacked = 0; + tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; + + /* RBS specific stuff */ + if (mptcp(tp)) { + /* note: only the meta_sk should be used for this*/ + if (tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb *rbs_cb = + mptcp_rbs_get_cb(meta_tp); + + if (rbs_cb->queue_position == NULL) { + rbs_cb->queue_position = skb; + mptcp_debug("rbs corrects queue " + "position, before NULL, " + "now %p\n", + skb); + } else { + mptcp_debug( + "rbs no need to correct queue " + "position, remains with %p, not " + "switched to %p\n", + rbs_cb->queue_position, skb); + } + } + } else { + printk("avoided bug at the airport 2\n"); + } + } + /* RBS specific stuff END */ } static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) @@ -673,14 +709,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { - return skb->len < size_goal && - sysctl_tcp_autocorking && + return skb->len < size_goal && sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && atomic_read(&sk->sk_wmem_alloc) > skb->truesize; } -static void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle, int size_goal) +static void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, + int size_goal) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -731,8 +766,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) { /* Store TCP splice context information in read_descriptor_t. */ read_descriptor_t rd_desc = { - .arg.data = tss, - .count = tss->len, + .arg.data = tss, .count = tss->len, }; return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); @@ -756,9 +790,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, { struct sock *sk = sock->sk; struct tcp_splice_state tss = { - .pipe = pipe, - .len = len, - .flags = flags, + .pipe = pipe, .len = len, .flags = flags, }; long timeo; ssize_t spliced; @@ -770,7 +802,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, if (mptcp(tcp_sk(sk))) { struct sock *sk_it; mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it) - sock_rps_record_flow(sk_it); + sock_rps_record_flow(sk_it); } #endif /* @@ -828,8 +860,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, lock_sock(sk); if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } @@ -884,8 +915,8 @@ unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) size_goal = tp->gso_segs * mss_now; if (unlikely(new_size_goal < size_goal || new_size_goal >= size_goal + mss_now)) { - tp->gso_segs = min_t(u16, new_size_goal / mss_now, - sk->sk_gso_max_segs); + tp->gso_segs = + min_t(u16, new_size_goal / mss_now, sk->sk_gso_max_segs); size_goal = tp->gso_segs * mss_now; } @@ -898,10 +929,12 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) if (mptcp(tcp_sk(sk))) { mss_now = mptcp_current_mss(sk); - *size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); + *size_goal = + mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); } else { mss_now = tcp_current_mss(sk); - *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); + *size_goal = + tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); } return mss_now; @@ -921,8 +954,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ? - tp->mpcb->master_sk : sk)) { + !tcp_passive_fastopen( + mptcp(tp) && tp->mpcb->master_sk ? tp->mpcb->master_sk : sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; } @@ -943,8 +976,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, return ret; } - mptcp_for_each_sk(tp->mpcb, sk_it) - sock_rps_record_flow(sk_it); + mptcp_for_each_sk(tp->mpcb, sk_it) sock_rps_record_flow(sk_it); } clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -962,7 +994,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, bool can_coalesce; if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { -new_segment: + new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -979,7 +1011,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1024,11 +1056,11 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, tcp_push_one(sk, mss_now); continue; -wait_for_sndbuf: + wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + wait_for_memory: + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, + size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1048,14 +1080,14 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, return sk_stream_error(sk, flags, err); } -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, + int flags) { ssize_t res; /* If MPTCP is enabled, we check it later after establishment */ if (!mptcp(tcp_sk(sk)) && (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM))) + !(sk->sk_route_caps & NETIF_F_ALL_CSUM))) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1108,8 +1140,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ - tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), - sk->sk_allocation); + tp->fastopen_req = + kzalloc(sizeof(struct tcp_fastopen_request), sk->sk_allocation); if (unlikely(!tp->fastopen_req)) return -ENOBUFS; tp->fastopen_req->data = msg; @@ -1150,16 +1182,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ? - tp->mpcb->master_sk : sk)) { + !tcp_passive_fastopen( + mptcp(tp) && tp->mpcb->master_sk ? tp->mpcb->master_sk : sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto do_error; } if (mptcp(tp)) { struct sock *sk_it = sk; - mptcp_for_each_sk(tp->mpcb, sk_it) - sock_rps_record_flow(sk_it); + mptcp_for_each_sk(tp->mpcb, sk_it) sock_rps_record_flow(sk_it); } if (unlikely(tp->repair)) { @@ -1204,7 +1235,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } if (copy <= 0) { -new_segment: + new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. */ @@ -1226,11 +1257,39 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) * In case of mptcp, hw-csum's will be handled * later in mptcp_write_xmit. */ - if (((mptcp(tp) && !tp->mpcb->dss_csum) || !mptcp(tp)) && + if (((mptcp(tp) && !tp->mpcb->dss_csum) || + !mptcp(tp)) && (mptcp(tp) || sk->sk_route_caps & NETIF_F_ALL_CSUM)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); + + /* RBS specific stuff to set skb props*/ + if (mptcp(tp)) { + /* note: only the meta_sk should be used for + * this*/ + if (tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb *rbs_cb = + mptcp_rbs_get_cb(meta_tp); + + mptcp_debug( + "setting skb->mptcp_rbs " + "with %i for %p with size " + "of tcp_skb_cb %lu\n", + TCP_SKB_CB(skb)->mptcp_rbs.user, + skb, + sizeof(struct tcp_skb_cb)); + TCP_SKB_CB(skb)->mptcp_rbs.user = + rbs_cb->skb_prop; + } + } + } + /* RBS specific stuff END */ + copy = size_goal; max = size_goal; @@ -1250,7 +1309,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (skb_availroom(skb) > 0) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + err = + skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; } else { @@ -1263,7 +1323,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1277,14 +1337,14 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, pfrag->page, - pfrag->offset, - copy); + pfrag->offset, copy); if (err) goto do_error; /* Update the skb. */ if (merge) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + skb_frag_size_add( + &skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, copy); @@ -1316,12 +1376,12 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) tcp_push_one(sk, mss_now); continue; -wait_for_sndbuf: + wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: + wait_for_memory: if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, + size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1368,7 +1428,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) /* No URG data to read. */ if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || tp->urg_data == TCP_URG_READ) - return -EINVAL; /* Yes this is right ! */ + return -EINVAL; /* Yes this is right ! */ if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE)) return -ENOTCONN; @@ -1412,7 +1472,8 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) /* XXX -- need to support SO_PEEK_OFF */ - skb_queue_walk(&sk->sk_write_queue, skb) { + skb_queue_walk(&sk->sk_write_queue, skb) + { err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) break; @@ -1437,13 +1498,13 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), - "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", - tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); + "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, + TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); - /* Delayed ACKs frequently hit locked sockets during bulk - * receive. */ + /* Delayed ACKs frequently hit locked sockets during bulk + * receive. */ if (icsk->icsk_ack.blocked || /* Once-per-two-segments ACK was not sent by tcp_input.c */ tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || @@ -1457,7 +1518,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !icsk->icsk_ack.pingpong)) && - !atomic_read(&sk->sk_rmem_alloc))) + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = true; } @@ -1471,12 +1532,13 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __u32 rcv_window_now = tcp_receive_window(tp); /* Optimize, __tcp_select_window() is not cheap. */ - if (2*rcv_window_now <= tp->window_clamp) { + if (2 * rcv_window_now <= tp->window_clamp) { __u32 new_window = tp->ops->__select_window(sk); /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. - * We can advertise it now, if it is not less than current one. + * We can advertise it now, if it is not less than + * current one. * "Lots" means "at least twice" here. */ if (new_window && new_window >= 2 * rcv_window_now) @@ -1514,7 +1576,8 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) offset = seq - TCP_SKB_CB(skb)->seq; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) offset--; - if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { + if (offset < skb->len || + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { *off = offset; return skb; } @@ -1627,7 +1690,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, u32 *seq; unsigned long used; int err; - int target; /* Read at least this many bytes */ + int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; struct sk_buff *skb; @@ -1645,8 +1708,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, #ifdef CONFIG_MPTCP if (mptcp(tp)) { struct sock *sk_it; - mptcp_for_each_sk(tp->mpcb, sk_it) - sock_rps_record_flow(sk_it); + mptcp_for_each_sk(tp->mpcb, sk_it) sock_rps_record_flow(sk_it); } #endif @@ -1686,24 +1748,28 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, do { u32 offset; - /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + /* Are we at urgent data? Stop if we have read anything or have + * SIGURG pending. */ if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { - copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + copied = + timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } /* Next get a buffer. */ - skb_queue_walk(&sk->sk_receive_queue, skb) { + skb_queue_walk(&sk->sk_receive_queue, skb) + { /* Now that we have two receive queues this * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), - "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", + "recvmsg bug: copied %X seq %X rcvnxt %X fl " + "%X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags)) break; @@ -1715,9 +1781,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, goto found_ok_skb; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; - WARN(!(flags & MSG_PEEK), - "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", - *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); + WARN( + !(flags & MSG_PEEK), + "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } /* Well, if we have backlog, try to process it now yet. */ @@ -1726,10 +1793,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, break; if (copied) { - if (sk->sk_err || - sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || + if (sk->sk_err || sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || signal_pending(current)) break; } else { @@ -1826,18 +1891,23 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); + NET_ADD_STATS_USER( + sock_net(sk), + LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && !skb_queue_empty(&tp->ucopy.prequeue)) { -do_prequeue: + do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER( + sock_net(sk), + LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, + chunk); len -= chunk; copied += chunk; } @@ -1845,9 +1915,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, } if ((flags & MSG_PEEK) && (peek_seq - copied - urg_hole != tp->copied_seq)) { - net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", - current->comm, - task_pid_nr(current)); + net_dbg_ratelimited( + "TCP(%s:%d): Application bug, race in MSG_PEEK\n", + current->comm, task_pid_nr(current)); peek_seq = tp->copied_seq; } continue; @@ -1892,7 +1962,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_rcv_space_adjust(sk); -skip_copy: + skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; tcp_fast_path_check(sk); @@ -1923,7 +1993,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER( + sock_net(sk), + LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1975,7 +2047,7 @@ void tcp_set_state(struct sock *sk, int state) if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); - /* fall through */ + /* fall through */ default: if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); @@ -1987,7 +2059,8 @@ void tcp_set_state(struct sock *sk, int state) sk->sk_state = state; #ifdef STATE_TRACE - SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); + SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], + statename[state]); #endif } EXPORT_SYMBOL_GPL(tcp_set_state); @@ -2000,25 +2073,25 @@ EXPORT_SYMBOL_GPL(tcp_set_state); */ static const unsigned char new_state[16] = { - /* current state: new state: action: */ - [0 /* (Invalid) */] = TCP_CLOSE, - [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, - [TCP_SYN_SENT] = TCP_CLOSE, - [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, - [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, - [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, - [TCP_TIME_WAIT] = TCP_CLOSE, - [TCP_CLOSE] = TCP_CLOSE, - [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, - [TCP_LAST_ACK] = TCP_LAST_ACK, - [TCP_LISTEN] = TCP_CLOSE, - [TCP_CLOSING] = TCP_CLOSING, - [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ + /* current state: new state: action: */ + [0 /* (Invalid) */] = TCP_CLOSE, + [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_SYN_SENT] = TCP_CLOSE, + [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, + [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, + [TCP_TIME_WAIT] = TCP_CLOSE, + [TCP_CLOSE] = TCP_CLOSE, + [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, + [TCP_LAST_ACK] = TCP_LAST_ACK, + [TCP_LISTEN] = TCP_CLOSE, + [TCP_CLOSING] = TCP_CLOSING, + [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ }; int tcp_close_state(struct sock *sk) { - int next = (int)new_state[sk->sk_state]; + int next = (int) new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; tcp_set_state(sk, ns); @@ -2041,9 +2114,8 @@ void tcp_shutdown(struct sock *sk, int how) return; /* If we've already sent a FIN, or it's a closed state, skip this. */ - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | + TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_sk(sk)->ops->send_fin(sk); @@ -2061,7 +2133,8 @@ bool tcp_check_oom(struct sock *sk, int shift) if (too_many_orphans) net_info_ratelimited("too many orphaned sockets\n"); if (out_of_socket_memory) - net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); + net_info_ratelimited( + "out of memory -- consider tuning tcp_mem\n"); return too_many_orphans || out_of_socket_memory; } @@ -2168,7 +2241,6 @@ void tcp_close(struct sock *sk, long timeout) /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); - /* Now socket is owned by kernel and we acquire BH lock to finish close. No need to check for user refs. */ @@ -2202,13 +2274,13 @@ void tcp_close(struct sock *sk, long timeout) tcp_set_state(sk, TCP_CLOSE); tp->ops->send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPABORTONLINGER); + LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, - tmo - TCP_TIMEWAIT_LEN); + inet_csk_reset_keepalive_timer( + sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_sk(sk)->ops->time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -2222,7 +2294,7 @@ void tcp_close(struct sock *sk, long timeout) tcp_set_state(sk, TCP_CLOSE); tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPABORTONMEMORY); + LINUX_MIB_TCPABORTONMEMORY); } } @@ -2236,7 +2308,7 @@ void tcp_close(struct sock *sk, long timeout) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } - /* Otherwise, socket is reprieved until protocol close. */ +/* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); @@ -2325,11 +2397,12 @@ void tcp_sock_destruct(struct sock *sk) static inline bool tcp_can_repair_sock(const struct sock *sk) { return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && - ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); } static int tcp_repair_options_est(struct tcp_sock *tp, - struct tcp_repair_opt __user *optbuf, unsigned int len) + struct tcp_repair_opt __user *optbuf, + unsigned int len) { struct tcp_repair_opt opt; @@ -2344,19 +2417,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, case TCPOPT_MSS: tp->rx_opt.mss_clamp = opt.opt_val; break; - case TCPOPT_WINDOW: - { - u16 snd_wscale = opt.opt_val & 0xFFFF; - u16 rcv_wscale = opt.opt_val >> 16; + case TCPOPT_WINDOW: { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; - if (snd_wscale > 14 || rcv_wscale > 14) - return -EFBIG; + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = snd_wscale; - tp->rx_opt.rcv_wscale = rcv_wscale; - tp->rx_opt.wscale_ok = 1; - } - break; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) return -EINVAL; @@ -2380,8 +2451,8 @@ static int tcp_repair_options_est(struct tcp_sock *tp, /* * Socket option code for TCP. */ -static int do_tcp_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +static int do_tcp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -2396,8 +2467,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < 1) return -EINVAL; - val = strncpy_from_user(name, optval, - min_t(long, TCP_CA_NAME_MAX-1, optlen)); + val = strncpy_from_user( + name, optval, min_t(long, TCP_CA_NAME_MAX - 1, optlen)); if (val < 0) return -EFAULT; name[val] = 0; @@ -2407,6 +2478,56 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } +#ifdef CONFIG_MPTCP + case MPTCP_SCHEDULER: { + lock_sock(sk); + + if (mptcp_init_failed || !sysctl_mptcp_enabled || + !mptcp_rbs_is_sched_used(tp)) { + err = -EPERM; + } else if (optlen == 0) { + if (!mptcp_rbs_scheduler_set(sk, NULL)) + err = -EINVAL; + } else if (optlen >= 256) { + err = -EINVAL; + } else { + char data[256]; + memset(data, 0, sizeof(data)); + + val = strncpy_from_user(data, optval, optlen); + if (val < 0) + err = -EFAULT; + else if (!mptcp_rbs_scheduler_set(sk, data)) + err = -EINVAL; + } + + release_sock(sk); + return err; + } + case MPTCP_SCHEDULER_REG: { + lock_sock(sk); + if (mptcp_init_failed || !sysctl_mptcp_enabled || + !mptcp_rbs_is_sched_used(tp)) { + err = -EPERM; + } else if (optlen != sizeof(struct mptcp_rbs_reg_value)) { + err = -EINVAL; + } else { + struct mptcp_rbs_reg_value reg_value; + + val = + copy_from_user(®_value, optval, + sizeof(struct mptcp_rbs_reg_value)); + if (val < 0) + err = -EFAULT; + else if (!mptcp_rbs_reg_value_set(tp, + ®_value)) + err = -EINVAL; + } + + release_sock(sk); + return err; + } +#endif default: /* fallthru */ break; @@ -2415,7 +2536,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (get_user(val, (int __user *) optval)) return -EFAULT; lock_sock(sk); @@ -2442,13 +2563,26 @@ static int do_tcp_setsockopt(struct sock *sk, int level, * an explicit push, which overrides even TCP_CORK * for currently queued segments. */ - tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + tp->nonagle |= TCP_NAGLE_OFF | TCP_NAGLE_PUSH; tcp_push_pending_frames(sk); } else { tp->nonagle &= ~TCP_NAGLE_OFF; } break; - +#ifdef CONFIG_MPTCP + case MPTCP_RBS_SKB_PROP: { + if (mptcp_init_failed || !sysctl_mptcp_enabled || + !mptcp_rbs_is_sched_used(tp)) { + err = -EPERM; + } else if (val < 0) { + err = -EFAULT; + } else if (val >= 0 && val <= 0x20) { // we only have 5 bit + mptcp_rbs_get_cb(tp)->skb_prop = val; + } else + err = -EINVAL; + break; + } +#endif case TCP_THIN_LINEAR_TIMEOUTS: if (val < 0 || val > 1) err = -EINVAL; @@ -2506,9 +2640,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (!tp->repair) err = -EINVAL; else if (sk->sk_state == TCP_ESTABLISHED) - err = tcp_repair_options_est(tp, - (struct tcp_repair_opt __user *)optval, - optlen); + err = tcp_repair_options_est( + tp, (struct tcp_repair_opt __user *) optval, + optlen); else err = -EPERM; break; @@ -2529,7 +2663,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->nonagle |= TCP_NAGLE_CORK; } else { tp->nonagle &= ~TCP_NAGLE_CORK; - if (tp->nonagle&TCP_NAGLE_OFF) + if (tp->nonagle & TCP_NAGLE_OFF) tp->nonagle |= TCP_NAGLE_PUSH; tcp_push_pending_frames(sk); } @@ -2588,9 +2722,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (mptcp(tp)) break; /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ); + icsk->icsk_accept_queue.rskq_defer_accept = secs_to_retrans( + val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: @@ -2601,8 +2734,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } tp->window_clamp = 0; } else - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; + tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 + ? SOCK_MIN_RCVBUF / 2 + : val; break; case TCP_QUICKACK: @@ -2611,7 +2745,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } else { icsk->icsk_ack.pingpong = 0; if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; tp->ops->cleanup_rbuf(sk, 1); @@ -2638,11 +2772,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | - TCPF_LISTEN))) + if (val >= 0 && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + tcp_fastopen_init_key_once(true); + err = fastopen_init_queue(sk, val); - else + } else { err = -EINVAL; + } break; case TCP_TIMESTAMP: if (!tp->repair) @@ -2683,8 +2820,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, + optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); @@ -2694,8 +2831,8 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (level != SOL_TCP) - return inet_csk_compat_setsockopt(sk, level, optname, - optval, optlen); + return inet_csk_compat_setsockopt(sk, level, optname, optval, + optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(compat_tcp_setsockopt); @@ -2708,6 +2845,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + u64 rate64; u32 rate; memset(info, 0, sizeof(*info)); @@ -2752,7 +2890,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_fackets = tp->fackets_out; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); - info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); + info->tcpi_last_data_recv = + jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = icsk->icsk_pmtu_cookie; @@ -2764,27 +2903,29 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_advmss = tp->advmss; info->tcpi_reordering = tp->reordering; - info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; + info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt) >> 3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; rate = READ_ONCE(sk->sk_pacing_rate); - info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate); - info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); do { start = u64_stats_fetch_begin_irq(&tp->syncp); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); } EXPORT_SYMBOL_GPL(tcp_get_info); -static int do_tcp_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, int __user *optlen) +static int do_tcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2807,10 +2948,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->rx_opt.mss_clamp; break; case TCP_NODELAY: - val = !!(tp->nonagle&TCP_NAGLE_OFF); + val = !!(tp->nonagle & TCP_NAGLE_OFF); break; case TCP_CORK: - val = !!(tp->nonagle&TCP_NAGLE_CORK); + val = !!(tp->nonagle & TCP_NAGLE_CORK); break; case TCP_KEEPIDLE: val = keepalive_time_when(tp) / HZ; @@ -2822,12 +2963,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ?: sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : sysctl_tcp_fin_timeout) / HZ; + val = (val ?: sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, @@ -2933,6 +3074,25 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case MPTCP_ENABLED: val = sock_flag(sk, SOCK_MPTCP) ? 1 : 0; break; + case MPTCP_SCHEDULER: { + const char *str = ""; + struct tcp_sock *tp = tcp_sk(sk); + + if (get_user(len, optlen)) + return -EFAULT; + + if (!mptcp_init_failed && sysctl_mptcp_enabled && + mptcp_rbs_is_sched_used(tp)) + str = mptcp_rbs_scheduler_get(sk)->name; + + len = min_t(unsigned int, len, strlen(str)); + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, str, len)) + return -EFAULT; + return 0; + } #endif default: return -ENOPROTOOPT; @@ -2951,8 +3111,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->getsockopt(sk, level, optname, optval, + optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_getsockopt); @@ -2962,8 +3122,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level != SOL_TCP) - return inet_csk_compat_getsockopt(sk, level, optname, - optval, optlen); + return inet_csk_compat_getsockopt(sk, level, optname, optval, + optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(compat_tcp_getsockopt); @@ -2978,7 +3138,8 @@ static void __tcp_alloc_md5sig_pool(void) { int cpu; - for_each_possible_cpu(cpu) { + for_each_possible_cpu(cpu) + { if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { struct crypto_hash *hash; @@ -3009,7 +3170,6 @@ bool tcp_alloc_md5sig_pool(void) } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); - /** * tcp_get_md5sig_pool - get md5sig_pool for this user * @@ -3031,8 +3191,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) } EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - const struct tcphdr *th) +int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { struct scatterlist sg; struct tcphdr hdr; @@ -3049,15 +3208,15 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_header); -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - const struct sk_buff *skb, unsigned int header_len) +int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const struct sk_buff *skb, + unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); struct hash_desc *desc = &hp->md5_desc; unsigned int i; - const unsigned int head_data_len = skb_headlen(skb) > header_len ? - skb_headlen(skb) - header_len : 0; + const unsigned int head_data_len = + skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); struct sk_buff *frag_iter; @@ -3078,15 +3237,15 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; } - skb_walk_frags(skb, frag_iter) - if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) - return 1; + skb_walk_frags(skb, frag_iter) if (tcp_md5_hash_skb_data(hp, frag_iter, + 0)) return 1; return 0; } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, + const struct tcp_md5sig_key *key) { struct scatterlist sg; @@ -3157,48 +3316,35 @@ void __init tcp_init(void) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); - tcp_hashinfo.bind_bucket_cachep = - kmem_cache_create("tcp_bind_bucket", - sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + tcp_hashinfo.bind_bucket_cachep = kmem_cache_create( + "tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* Size and allocate the main established and bind bucket * hash tables. * * The methodology is similar to that of the buffer cache. */ - tcp_hashinfo.ehash = - alloc_large_system_hash("TCP established", - sizeof(struct inet_ehash_bucket), - thash_entries, - 17, /* one slot per 128 KB of memory */ - 0, - NULL, - &tcp_hashinfo.ehash_mask, - 0, - thash_entries ? 0 : 512 * 1024); + tcp_hashinfo.ehash = alloc_large_system_hash( + "TCP established", sizeof(struct inet_ehash_bucket), thash_entries, + 17, /* one slot per 128 KB of memory */ + 0, NULL, &tcp_hashinfo.ehash_mask, 0, + thash_entries ? 0 : 512 * 1024); for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); - tcp_hashinfo.bhash = - alloc_large_system_hash("TCP bind", - sizeof(struct inet_bind_hashbucket), - tcp_hashinfo.ehash_mask + 1, - 17, /* one slot per 128 KB of memory */ - 0, - &tcp_hashinfo.bhash_size, - NULL, - 0, - 64 * 1024); + tcp_hashinfo.bhash = alloc_large_system_hash( + "TCP bind", sizeof(struct inet_bind_hashbucket), + tcp_hashinfo.ehash_mask + 1, 17, /* one slot per 128 KB of memory */ + 0, &tcp_hashinfo.bhash_size, NULL, 0, 64 * 1024); tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; for (i = 0; i < tcp_hashinfo.bhash_size; i++) { spin_lock_init(&tcp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - cnt = tcp_hashinfo.ehash_mask + 1; tcp_death_row.sysctl_max_tw_buckets = cnt / 2; @@ -3208,12 +3354,12 @@ void __init tcp_init(void) tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_wshare = min(4UL*1024*1024, limit); - max_rshare = min(6UL*1024*1024, limit); + max_wshare = min(4UL * 1024 * 1024, limit); + max_rshare = min(6UL * 1024 * 1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_wshare); + sysctl_tcp_wmem[1] = 16 * 1024; + sysctl_tcp_wmem[2] = max(64 * 1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f9f40850329d8..78adffd46b2ab 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -79,8 +79,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path, struct tcp_fastopen_context *ctx; bool ok = false; - tcp_fastopen_init_key_once(true); - rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a46c550789dd5..57de1d3f57186 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3296,6 +3296,16 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) tp->bytes_acked += delta; u64_stats_update_end(&tp->syncp); tp->snd_una = ack; + + /* + * rbs statistics, the number of bytes between snd_una and data_ack + * was acknowledged + */ + if(mptcp(tp) && mptcp_meta_tp(tp)->mpcb->sched_ops->update_stats) { + /* note that this is unsigned stuff, even wrap a around is correct */ + mptcp_meta_tp(tp)->mpcb->sched_ops->update_stats((struct sock*) tp, NULL, delta, 1); + } + /* rbs statistics end */ } /* If we update tp->rcv_nxt, also update tp->bytes_received */ @@ -4485,19 +4495,34 @@ int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_buff *skb; + int err = -ENOMEM; + int data_len = 0; bool fragstolen; if (size == 0) return 0; - skb = alloc_skb(size, sk->sk_allocation); + if (size > PAGE_SIZE) { + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); + + data_len = npages << PAGE_SHIFT; + size = data_len + (size & ~PAGE_MASK); + } + skb = alloc_skb_with_frags(size - data_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, + &err, sk->sk_allocation); if (!skb) goto err; + skb_put(skb, size - data_len); + skb->data_len = data_len; + skb->len = size; + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (memcpy_from_msg(skb_put(skb, size), msg, size)) + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (err) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; @@ -4513,7 +4538,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) err_free: kfree_skb(skb); err: - return -ENOMEM; + return err; + } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) @@ -4522,6 +4548,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) int eaten = -1; bool fragstolen = false; + //if(mptcp(tp)) + // printk("%s afr_ofo for sk %p with skb %p called by %pS\n", __func__, sk, skb, __builtin_return_address(0)); + /* If no data is present, but a data_fin is in the options, we still * have to call mptcp_queue_skb later on. */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && @@ -4535,6 +4564,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tp->rx_opt.dsack = 0; + //if(mptcp(tp)) + // printk("%s afr_ofo skb->seq %u compared with tp->rcv_nxt %u\n", __func__, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); + /* Queue data for delivery to the user. * Packets in sequence go to the receive queue. * Out of sequence packets to the out_of_order_queue. @@ -4623,7 +4655,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ - SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", + printk("partial packet: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -4638,6 +4670,10 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) } tcp_data_queue_ofo(sk, skb); + + if(mptcp(tp)) { + sk->sk_data_ready(sk); + } } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, @@ -5760,6 +5796,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tp->copied_seq = tp->rcv_nxt; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8cb598f8ae81b..270a079207d9b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -714,7 +714,8 @@ void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack, +static void tcp_v4_send_ack(struct net *net, + struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos, int mptcp) @@ -733,7 +734,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -817,8 +817,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) mptcp = 1; } - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, - data_ack, + tcp_v4_send_ack(sock_net(sk), skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, data_ack, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -837,8 +837,10 @@ void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + tcp_v4_send_ack(sock_net(sk), skb, seq, tcp_rsk(req)->rcv_nxt, 0, req->rcv_wnd, tcp_time_stamp, req->ts_recent, @@ -958,7 +960,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -1386,7 +1389,7 @@ struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } @@ -1556,7 +1559,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (likely(sk->sk_rx_dst)) skb_dst_drop(skb); else - skb_dst_force(skb); + skb_dst_force_safe(skb); __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; @@ -1817,8 +1820,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { - dst_hold(dst); + if (dst && dst_hold_safe(dst)) { sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 57216bebaf480..a61aacf756f83 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -183,9 +183,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, tw->tw_timeout); + inet_twsk_reschedule(tw, tw->tw_timeout); else - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -223,7 +223,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -273,7 +273,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); @@ -353,9 +353,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } while (0); #endif - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); - /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; @@ -369,6 +366,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } inet_twsk_schedule(tw, timeo); + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 324f232d66407..95344b55aa815 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -48,6 +48,9 @@ #include #include + +#include "../mptcp/mptcp_rbs_sched.h" + /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; @@ -936,7 +939,7 @@ void tcp_wfree(struct sk_buff *skb) * SKB, or a fresh unique copy made by the retransmit engine. */ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) + gfp_t gfp_mask) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -1078,13 +1081,34 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - + /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); + + /* specific stuff */ + if(mptcp(tp)) { + /* note: only the meta_sk should be used for this*/ + if(tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb* rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if(rbs_cb->queue_position == NULL) { + rbs_cb->queue_position = skb; + mptcp_debug("rbs corrects queue position, before NULL, now %p\n", skb); + } else { + mptcp_debug("rbs no need to correct queue position, remains with %p, not switched to %p\n", rbs_cb->queue_position, skb); + } + } + } + } + /* RBS specific stuff END */ } /* Initialize TSO segments for a packet. */ @@ -1255,8 +1279,48 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Link BUFF into the send queue. */ __skb_header_release(buff); + // rbs debug stuff + //printk("calling tcp_insert_write_queue_after with skb %p, skb->next %p, skb->prev %p, buff %p, buf->next %p, buf->prev %p for sk %p\n", skb, skb->next, skb->prev, buff, buff->next, buff->prev, sk); + if(!skb) { + printk("fragement found skb as null\n"); + } + + if(!skb->next) { + printk("fragement found skb->next as null for skb %p\n", skb); + } + tcp_insert_write_queue_after(skb, buff, sk); + /* RBS specific stuff */ + /* note: only the meta_sk should be used for this*/ + if(mptcp(tp) && tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb* rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if (!rbs_cb->queue_position) { + rbs_cb->queue_position = buff; + mptcp_debug("rbs corrects queue position, before NULL, now %p\n", buff); + } else { + // TODO: semantisch ist das kritisch... + // wenn der queue pointer VOR dieses packet zeigt, machen wir nichts, da wir spaeter ohnehin auf dieses packet zeigen + // wenn der queue pointer HINTER dieses packet zeigt, ziehen wir ihn wieder nach vorne... + // ggf. waere das hier auch ein fall fuer eine open action... + mptcp_debug("comparing seq numbers for correcting queue new packet %u and current queue %u\n", TCP_SKB_CB(buff)->seq, TCP_SKB_CB(rbs_cb->queue_position)->seq); + + if (before(TCP_SKB_CB(buff)->seq, TCP_SKB_CB(rbs_cb->queue_position)->seq)) { + mptcp_debug("rbs corrects queue position, before %p, now %p\n", rbs_cb->queue_position, buff); + rbs_cb->queue_position = buff; + } else { + mptcp_debug("rbs no need to correct queue position, remains with %p, not switched to %p\n", rbs_cb->queue_position, buff); + } + } + } + } + /* RBS specific stuff END */ + return 0; } @@ -2110,7 +2174,18 @@ bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; - else + else if(mptcp(tp) && + /* is a subflow */ + tp->mpcb->meta_sk != (struct sock *) tp && + /* uses RBS */ + mptcp_rbs_is_sched_used((struct tcp_sock*) tp->mpcb->meta_sk) && + ignoreSbfCwndConfig) { + /* + * RBS does not need a cwnd check for subflows + */ + mptcp_debug("%s overruled congestion control for meta_sk %p, setting cwnd_quota = 1\n", __func__, tp->mpcb->meta_sk); + cwnd_quota = 1; + } else break; } @@ -2154,6 +2229,11 @@ bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); +/* if(mptcp(tp)) { + printk("AFR_TSQ checks with limit %d and sysvalue %d and truesize %d against value sk_wmem atomic %d\n", + limit, sysctl_tcp_limit_output_bytes, 2* skb->truesize, atomic_read(&sk->sk_wmem_alloc)); + }*/ + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); /* It is possible TX completion already happened @@ -2939,6 +3019,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); + skb_mstamp_get(&skb->skb_mstamp); /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); @@ -3218,7 +3299,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0, copied; + int syn_loss = 0, space, err = 0; unsigned long last_syn_loss = 0; struct sk_buff *syn_data; @@ -3256,17 +3337,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); - copied = copy_from_iter(skb_put(syn_data, space), space, - &fo->data->msg_iter); - if (unlikely(!copied)) { - kfree_skb(syn_data); - goto fallback; - } - if (copied != space) { - skb_trim(syn_data, copied); - space = copied; + if (space) { + int copied = copy_from_iter(skb_put(syn_data, space), space, + &fo->data->msg_iter); + if (unlikely(!copied)) { + kfree_skb(syn_data); + goto fallback; + } + if (copied != space) { + skb_trim(syn_data, copied); + space = copied; + } } - /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) fo->data = NULL; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ad1ab4aed6609..a484ef9a36ebf 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -185,6 +185,18 @@ int tcp_write_timeout(struct sock *sk) #endif /* CONFIG_MPTCP */ } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + /* Some middle-boxes may black-hole Fast Open _after_ + * the handshake. Therefore we conservatively disable + * Fast Open on this path on recurring timeouts with + * few or zero bytes acked after Fast Open. + */ + if (tp->syn_data_acked && + tp->bytes_acked <= tp->rx_opt.mss_clamp) { + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); + if (icsk->icsk_retransmits == sysctl_tcp_retries1) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } /* Black hole detection */ tcp_mtu_probing(icsk, sk); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 17d35662930d0..3e6a472e6b883 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - return tp->snd_cwnd - reduction; + return max_t(int, tp->snd_cwnd - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 83aa604f9273c..a390174b96de6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -963,8 +963,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, sk->sk_family == AF_INET6); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; connected = 0; @@ -1995,12 +1997,19 @@ void udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = sk->sk_rx_dst; + dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); - if (dst) - skb_dst_set_noref(skb, dst); + if (dst) { + /* DST_NOCACHE can not be used without taking a reference */ + if (dst->flags & DST_NOCACHE) { + if (likely(atomic_inc_not_zero(&dst->__refcnt))) + skb_dst_set(skb, dst); + } else { + skb_dst_set_noref(skb, dst); + } + } } int udp_rcv(struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index bff69746e05f0..78526087126de 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -230,7 +230,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xfrm_dst_ifdown(dst, dev); } -static struct dst_ops xfrm4_dst_ops = { +static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, @@ -244,7 +244,7 @@ static struct dst_ops xfrm4_dst_ops = { static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, - .dst_ops = &xfrm4_dst_ops, + .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, .decode_session = _decode_session4, @@ -266,7 +266,7 @@ static struct ctl_table xfrm4_policy_table[] = { { } }; -static int __net_init xfrm4_net_init(struct net *net) +static int __net_init xfrm4_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -294,7 +294,7 @@ static int __net_init xfrm4_net_init(struct net *net) return -ENOMEM; } -static void __net_exit xfrm4_net_exit(struct net *net) +static void __net_exit xfrm4_net_sysctl_exit(struct net *net) { struct ctl_table *table; @@ -306,12 +306,44 @@ static void __net_exit xfrm4_net_exit(struct net *net) if (!net_eq(net, &init_net)) kfree(table); } +#else /* CONFIG_SYSCTL */ +static int inline xfrm4_net_sysctl_init(struct net *net) +{ + return 0; +} + +static void inline xfrm4_net_sysctl_exit(struct net *net) +{ +} +#endif + +static int __net_init xfrm4_net_init(struct net *net) +{ + int ret; + + memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template, + sizeof(xfrm4_dst_ops_template)); + ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops); + if (ret) + return ret; + + ret = xfrm4_net_sysctl_init(net); + if (ret) + dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); + + return ret; +} + +static void __net_exit xfrm4_net_exit(struct net *net) +{ + xfrm4_net_sysctl_exit(net); + dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); +} static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, }; -#endif static void __init xfrm4_policy_init(void) { @@ -320,13 +352,9 @@ static void __init xfrm4_policy_init(void) void __init xfrm4_init(void) { - dst_entries_init(&xfrm4_dst_ops); - xfrm4_state_init(); xfrm4_policy_init(); xfrm4_protocol_init(); -#ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm4_net_ops); -#endif } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8f338657dfcdf..3eaba4f006c8f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_from_local = 0, + .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -236,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_from_local = 0, + .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -343,6 +345,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) setup_timer(&ndev->rs_timer, addrconf_rs_timer, (unsigned long)ndev); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); + + if (ndev->cnf.stable_secret.initialized) + ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + else + ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; + ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -411,6 +419,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (err) { ipv6_mc_destroy_dev(ndev); del_timer(&ndev->regen_timer); + snmp6_unregister_dev(ndev); goto err_release; } /* protected by rtnl_lock */ @@ -560,7 +569,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; @@ -2384,7 +2393,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && !net->ipv6.devconf_all->forwarding && sllao) - addr_flags = IFA_F_OPTIMISTIC; + addr_flags |= IFA_F_OPTIMISTIC; #endif /* Do not allow to create too much of autoconfigured @@ -3415,6 +3424,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; + bool notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3460,7 +3470,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ - ipv6_ifa_notify(RTM_NEWADDR, ifp); + notify = true; } } @@ -3468,6 +3478,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) out: spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); + if (notify) + ipv6_ifa_notify(RTM_NEWADDR, ifp); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) @@ -4559,6 +4571,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; + array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; @@ -5261,13 +5274,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, goto out; } - if (!write) { - err = snprintf(str, sizeof(str), "%pI6", - &secret->secret); - if (err >= sizeof(str)) { - err = -EIO; - goto out; - } + err = snprintf(str, sizeof(str), "%pI6", &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; } err = proc_dostring(&lctl, write, buffer, lenp, ppos); @@ -5454,6 +5464,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "accept_ra_min_hop_limit", + .data = &ipv6_devconf.accept_ra_min_hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "accept_ra_pinfo", .data = &ipv6_devconf.accept_ra_pinfo, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 882124ebb438b..a8f6986dcbe5e 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); - if (p && ip6addrlbl_hold(p)) + if (p && !ip6addrlbl_hold(p)) p = NULL; lseq = ip6addrlbl_table.seq; rcu_read_unlock(); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 09d2c6b8b4e14..8c4dd4a7d8f82 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -108,6 +108,9 @@ int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) int try_loading_module = 0; int err; + if (protocol < 0 || protocol >= IPPROTO_MAX) + return -EINVAL; + /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; @@ -424,9 +427,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -655,7 +660,10 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 762a58c772b81..8e6cb3f143268 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -162,13 +162,18 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; @@ -204,6 +209,16 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, @@ -325,6 +340,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. * * At one point, excluding local errors was a quick test to identify icmp/icmp6 @@ -389,7 +414,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv6_datagram_support_addr(serr)) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a7bbbe45570b2..adbd6958c3984 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char **)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index 447a7fbd1bb6f..f5e2ba1c18bf8 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -36,6 +36,6 @@ int __init ipv6_exthdrs_offload_init(void) return ret; out_rt: - inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING); goto out; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9003b9f7dfb31..6ef1845b35f60 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_TCP; fl6->daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = ireq->ir_mark; @@ -207,7 +209,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { @@ -240,7 +244,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d491125011c4d..db939e4ac68a8 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp)) != NULL; + (sfl = rcu_dereference_protected(*sflp, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = rcu_dereference(sfl->next); + *sflp = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a38d3ac0f18f6..76be7d311cc4e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, t); + ip6_tnl_dst_reset(t); dev_put(dev); } @@ -1552,13 +1553,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; } else { t = nt; - - ip6gre_tunnel_unlink(ign, t); - ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); - ip6gre_tunnel_link(ign, t); - netdev_state_change(dev); } + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); return 0; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464eba5efd..57990c929cd81 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e893cd18612fc..08b62047c67f3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc09cb97b8401..06bf4010d3ed7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -376,6 +376,9 @@ int ip6_forward(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; @@ -885,6 +888,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct rt6_info *rt; #endif int err; + int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, @@ -916,10 +920,13 @@ static int ip6_dst_lookup_tail(struct sock *sk, dst_release(*dst); *dst = NULL; } + + if (fl6->flowi6_oif) + flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) @@ -1329,7 +1336,7 @@ static int __ip6_append_data(struct sock *sk, (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 74ceb73c1c9a0..a7aef4b52d659 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, int cmd); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr6_table *mrt); +static void mroute_clean_tables(struct mr6_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -335,7 +335,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr6_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -550,7 +550,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mrt->mfc6_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mrt->mfc6_cache_array) + else if (it->cache == &mrt->mfc6_cache_array[it->ct]) read_unlock(&mrt_lock); } @@ -1543,7 +1543,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr6_table *mrt) +static void mroute_clean_tables(struct mr6_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1553,8 +1553,9 @@ static void mroute_clean_tables(struct mr6_table *mrt) * Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(mrt, i, &list); + if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + continue; + mif6_delete(mrt, i, &list); } unregister_netdevice_many(&list); @@ -1563,7 +1564,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) */ for (i = 0; i < MFC6_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; write_lock_bh(&mrt_lock); list_del(&c->list); @@ -1626,7 +1627,7 @@ int ip6mr_sk_done(struct sock *sk) net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); err = 0; break; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b3cd721aaf905..c23941b3a2b96 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -113,7 +113,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); sk_dst_reset(sk); return opt; @@ -238,9 +239,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -415,7 +419,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -444,8 +449,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -498,6 +505,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -514,8 +522,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1122,10 +1132,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 083b2927fc67a..41e3b5ee8d0b6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1651,7 +1651,6 @@ static void mld_sendpack(struct sk_buff *skb) if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); } else { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); } @@ -2014,7 +2013,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len); } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 96f153c0846b7..abb0bdda759a5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1225,18 +1225,16 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); - if (ra_msg->icmph.icmp6_hop_limit) { - /* Only set hop_limit on the interface if it is higher than - * the current hop_limit. - */ - if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && + ra_msg->icmph.icmp6_hop_limit) { + if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + if (rt) + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } else { - ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } - if (rt) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, - ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1bd..d235ed7f47ab5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static inline struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, - struct in6_addr *dst, u8 ecn) + struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.user = user; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; local_bh_disable(); @@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) fhdr = (struct frag_hdr *)skb_transport_header(clone); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8072bd4139b75..2c639aee12cb6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -731,6 +731,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; @@ -837,8 +838,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -901,6 +904,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: dst_confirm(dst); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce774..9d1f6a28b2841 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) return fq->id == arg->id && fq->user == arg->user && ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst); + ipv6_addr_equal(&fq->daddr, arg->dst) && + (arg->iif == fq->iif || + !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))); } EXPORT_SYMBOL(ip6_frag_match); @@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data) static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, - const struct in6_addr *dst, u8 ecn) + const struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; hash = inet6_hash_frag(id, src, dst); @@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq) { int ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c73ae5039e46d..fe70bd6a75162 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1030,11 +1030,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags) { - int flags = 0; - fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) @@ -1047,7 +1045,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } -EXPORT_SYMBOL(ip6_route_output); +EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { @@ -1515,7 +1513,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, return -EINVAL; } -int ip6_route_add(struct fib6_config *cfg) +int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) { int err; struct net *net = cfg->fc_nlinfo.nl_net; @@ -1523,7 +1521,6 @@ int ip6_route_add(struct fib6_config *cfg) struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; - struct mx6_config mxc = { .mx = NULL, }; int addr_type; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) @@ -1719,6 +1716,32 @@ int ip6_route_add(struct fib6_config *cfg) cfg->fc_nlinfo.nl_net = dev_net(dev); + *rt_ret = rt; + + return 0; +out: + if (dev) + dev_put(dev); + if (idev) + in6_dev_put(idev); + if (rt) + dst_free(&rt->dst); + + *rt_ret = NULL; + + return err; +} + +int ip6_route_add(struct fib6_config *cfg) +{ + struct mx6_config mxc = { .mx = NULL, }; + struct rt6_info *rt = NULL; + int err; + + err = ip6_route_info_create(cfg, &rt); + if (err) + goto out; + err = ip6_convert_metrics(&mxc, cfg); if (err) goto out; @@ -1726,14 +1749,12 @@ int ip6_route_add(struct fib6_config *cfg) err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); kfree(mxc.mx); + return err; out: - if (dev) - dev_put(dev); - if (idev) - in6_dev_put(idev); if (rt) dst_free(&rt->dst); + return err; } @@ -2496,19 +2517,78 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int ip6_route_multipath(struct fib6_config *cfg, int add) +struct rt6_nh { + struct rt6_info *rt6_info; + struct fib6_config r_cfg; + struct mx6_config mxc; + struct list_head next; +}; + +static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) +{ + struct rt6_nh *nh; + + list_for_each_entry(nh, rt6_nh_list, next) { + pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", + &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, + nh->r_cfg.fc_ifindex); + } +} + +static int ip6_route_info_append(struct list_head *rt6_nh_list, + struct rt6_info *rt, struct fib6_config *r_cfg) +{ + struct rt6_nh *nh; + struct rt6_info *rtnh; + int err = -EEXIST; + + list_for_each_entry(nh, rt6_nh_list, next) { + /* check if rt6_info already exists */ + rtnh = nh->rt6_info; + + if (rtnh->dst.dev == rt->dst.dev && + rtnh->rt6i_idev == rt->rt6i_idev && + ipv6_addr_equal(&rtnh->rt6i_gateway, + &rt->rt6i_gateway)) + return err; + } + + nh = kzalloc(sizeof(*nh), GFP_KERNEL); + if (!nh) + return -ENOMEM; + nh->rt6_info = rt; + err = ip6_convert_metrics(&nh->mxc, r_cfg); + if (err) { + kfree(nh); + return err; + } + memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); + list_add_tail(&nh->next, rt6_nh_list); + + return 0; +} + +static int ip6_route_multipath_add(struct fib6_config *cfg) { struct fib6_config r_cfg; struct rtnexthop *rtnh; + struct rt6_info *rt; + struct rt6_nh *err_nh; + struct rt6_nh *nh, *nh_safe; int remaining; int attrlen; - int err = 0, last_err = 0; + int err = 1; + int nhn = 0; + int replace = (cfg->fc_nlinfo.nlh && + (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); + LIST_HEAD(rt6_nh_list); remaining = cfg->fc_mp_len; -beginning: rtnh = (struct rtnexthop *)cfg->fc_mp; - /* Parse a Multipath Entry */ + /* Parse a Multipath Entry and build a list (rt6_nh_list) of + * rt6_info structs per nexthop + */ while (rtnh_ok(rtnh, remaining)) { memcpy(&r_cfg, cfg, sizeof(*cfg)); if (rtnh->rtnh_ifindex) @@ -2524,22 +2604,32 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add) r_cfg.fc_flags |= RTF_GATEWAY; } } - err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); + + err = ip6_route_info_create(&r_cfg, &rt); + if (err) + goto cleanup; + + err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); if (err) { - last_err = err; - /* If we are trying to remove a route, do not stop the - * loop when ip6_route_del() fails (because next hop is - * already gone), we should try to remove all next hops. - */ - if (add) { - /* If add fails, we should try to delete all - * next hops that have been already added. - */ - add = 0; - remaining = cfg->fc_mp_len - remaining; - goto beginning; - } + dst_free(&rt->dst); + goto cleanup; + } + + rtnh = rtnh_next(rtnh, &remaining); + } + + err_nh = NULL; + list_for_each_entry(nh, &rt6_nh_list, next) { + err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); + /* nh->rt6_info is used or freed at this point, reset to NULL*/ + nh->rt6_info = NULL; + if (err) { + if (replace && nhn) + ip6_print_replace_route_err(&rt6_nh_list); + err_nh = nh; + goto add_errout; } + /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, * we have already failed to add the first nexthop: @@ -2549,6 +2639,63 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add) */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + nhn++; + } + + goto cleanup; + +add_errout: + /* Delete routes that were already added */ + list_for_each_entry(nh, &rt6_nh_list, next) { + if (err_nh == nh) + break; + ip6_route_del(&nh->r_cfg); + } + +cleanup: + list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { + if (nh->rt6_info) + dst_free(&nh->rt6_info->dst); + if (nh->mxc.mx) + kfree(nh->mxc.mx); + list_del(&nh->next); + kfree(nh); + } + + return err; +} + +static int ip6_route_multipath_del(struct fib6_config *cfg) +{ + struct fib6_config r_cfg; + struct rtnexthop *rtnh; + int remaining; + int attrlen; + int err = 1, last_err = 0; + + remaining = cfg->fc_mp_len; + rtnh = (struct rtnexthop *)cfg->fc_mp; + + /* Parse a Multipath Entry */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_flags |= RTF_GATEWAY; + } + } + err = ip6_route_del(&r_cfg); + if (err) + last_err = err; + rtnh = rtnh_next(rtnh, &remaining); } @@ -2565,7 +2712,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) return err; if (cfg.fc_mp) - return ip6_route_multipath(&cfg, 0); + return ip6_route_multipath_del(&cfg); else return ip6_route_del(&cfg); } @@ -2580,7 +2727,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) return err; if (cfg.fc_mp) - return ip6_route_multipath(&cfg, 1); + return ip6_route_multipath_add(&cfg); else return ip6_route_add(&cfg); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ac35a28599be5..85c4b2fff504d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) +static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - tunnel->dev = dev; - tunnel->net = dev_net(dev); - iph->version = 4; iph->protocol = IPPROTO_IPV6; iph->ihl = 5; iph->ttl = 64; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { - free_percpu(dev->tstats); - return -ENOMEM; - } - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); - return 0; } static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net) */ sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; - err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - if (err) - goto err_dev_free; - - ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); err = register_netdev(sitn->fb_tunnel_dev); if (err) goto err_reg_dev; + ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + t = netdev_priv(sitn->fb_tunnel_dev); strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: - dev_put(sitn->fb_tunnel_dev); -err_dev_free: ipip6_dev_free(sitn->fb_tunnel_dev); err_alloc_dev: return err; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a50d6ae442529..faea999470aa7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -278,7 +278,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = ireq->ir_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9ba7277a80636..04e1b2359e471 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -87,10 +87,9 @@ void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { + if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; if (rt->rt6i_node) @@ -115,6 +114,7 @@ int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; @@ -241,7 +241,8 @@ int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -270,9 +271,9 @@ int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -477,7 +478,10 @@ int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); - err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); if (!tcp_rsk(req)->snt_synack && !err) tcp_rsk(req)->snt_synack = tcp_time_stamp; @@ -983,7 +987,7 @@ struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } @@ -1037,6 +1041,7 @@ struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1188,13 +1193,15 @@ struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ - if (np->opt) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_ca_openreq_child(newsk, dst); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51fc3eee6dbd..7333f3575fc54 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1107,6 +1107,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1260,8 +1261,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; connected = 0; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1370,6 +1373,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /* diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 901ef6f8addc0..5266ad2d64196 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -24,7 +24,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) struct ipv6hdr *inner_iph = ipipv6_hdr(skb); if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) - IP6_ECN_set_ce(inner_iph); + IP6_ECN_set_ce(skb, inner_iph); } /* Add encapsulation header. diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f337a908a76a1..4fb94f6ee15b4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -289,7 +289,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xfrm_dst_ifdown(dst, dev); } -static struct dst_ops xfrm6_dst_ops = { +static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, @@ -303,7 +303,7 @@ static struct dst_ops xfrm6_dst_ops = { static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, - .dst_ops = &xfrm6_dst_ops, + .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, @@ -336,7 +336,7 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static int __net_init xfrm6_net_init(struct net *net) +static int __net_init xfrm6_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -364,7 +364,7 @@ static int __net_init xfrm6_net_init(struct net *net) return -ENOMEM; } -static void __net_exit xfrm6_net_exit(struct net *net) +static void __net_exit xfrm6_net_sysctl_exit(struct net *net) { struct ctl_table *table; @@ -376,24 +376,52 @@ static void __net_exit xfrm6_net_exit(struct net *net) if (!net_eq(net, &init_net)) kfree(table); } +#else /* CONFIG_SYSCTL */ +static int inline xfrm6_net_sysctl_init(struct net *net) +{ + return 0; +} + +static void inline xfrm6_net_sysctl_exit(struct net *net) +{ +} +#endif + +static int __net_init xfrm6_net_init(struct net *net) +{ + int ret; + + memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, + sizeof(xfrm6_dst_ops_template)); + ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); + if (ret) + return ret; + + ret = xfrm6_net_sysctl_init(net); + if (ret) + dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); + + return ret; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + xfrm6_net_sysctl_exit(net); + dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); +} static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, }; -#endif int __init xfrm6_init(void) { int ret; - dst_entries_init(&xfrm6_dst_ops); - ret = xfrm6_policy_init(); - if (ret) { - dst_entries_destroy(&xfrm6_dst_ops); + if (ret) goto out; - } ret = xfrm6_state_init(); if (ret) goto out_policy; @@ -402,9 +430,7 @@ int __init xfrm6_init(void) if (ret) goto out_state; -#ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm6_net_ops); -#endif out: return ret; out_state: @@ -416,11 +442,8 @@ int __init xfrm6_init(void) void xfrm6_fini(void) { -#ifdef CONFIG_SYSCTL unregister_pernet_subsys(&xfrm6_net_ops); -#endif xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); - dst_entries_destroy(&xfrm6_dst_ops); } diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ee0ea25c8e7aa..9a1edcde4ba54 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1086,6 +1086,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; struct irda_sock *self; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (net != &init_net) return -EAFNOSUPPORT; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index a26c401ef4a44..43964594aa12d 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off) for (element = hashbin_get_first(iter->hashbin); element != NULL; element = hashbin_get_next(iter->hashbin)) { - if (!off || *off-- == 0) { + if (!off || (*off)-- == 0) { /* NB: hashbin left locked */ return element; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6daa52a18d40c..123f6f9f854c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -709,6 +709,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, if (!addr || addr->sa_family != AF_IUCV) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_iucv)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != IUCV_OPEN) { err = -EBADFD; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a29a504492af6..e3db498f02330 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) tunnel = container_of(work, struct l2tp_tunnel, del_work); sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) - return; + goto out; sock = sk->sk_socket; @@ -1340,6 +1340,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } l2tp_tunnel_sock_put(sk); +out: + l2tp_tunnel_dec_refcount(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1639,8 +1641,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_inc_refcount(tunnel); l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + if (false == queue_work(l2tp_wq, &tunnel->del_work)) { + l2tp_tunnel_dec_refcount(tunnel); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d1ded3777815e..0ce9da948ad7f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; @@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -631,6 +634,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 9e13c2ff87897..fe92a08b3cd55 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family, ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); @@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family, ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, session, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ff347a0eebd4f..f06d42267306e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3356,6 +3356,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* Update CSA counters */ if (sdata->vif.csa_active && (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT || sdata->vif.type == NL80211_IFTYPE_ADHOC) && params->n_csa_offsets) { int i; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 29236e832e444..c09c0131bfa22 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -723,6 +723,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) debugfs_remove_recursive(sdata->vif.debugfs_dir); sdata->vif.debugfs_dir = NULL; + sdata->debugfs.subdir_stations = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bfef1b2150504..41adfc898a18e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -146,6 +146,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, csa_settings->chandef.chan->center_freq); presp->csa_counter_offsets[0] = (pos - presp->head); *pos++ = csa_settings->count; + presp->csa_current_counter = csa_settings->count; } /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */ @@ -1726,7 +1727,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; sdata->u.ibss.last_scan_completed = jiffies; - ieee80211_queue_work(&local->hw, &sdata->work); } mutex_unlock(&local->iflist_mtx); } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a907f2d5c12d8..81e9785f38bc2 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) if (sdata->vif.type != NL80211_IFTYPE_AP) return; - mutex_lock(&sdata->local->mtx); + /* crypto_tx_tailroom_needed_cnt is protected by this */ + assert_key_lock(sdata->local); + + rcu_read_lock(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list) vlan->crypto_tx_tailroom_needed_cnt += delta; - mutex_unlock(&sdata->local->mtx); + rcu_read_unlock(); } static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) @@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ + assert_key_lock(sdata->local); + update_vlan_tailroom_need_count(sdata, 1); if (!sdata->crypto_tx_tailroom_needed_cnt++) { @@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { + assert_key_lock(sdata->local); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); update_vlan_tailroom_need_count(sdata, -delta); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index df3051d96afff..e86daed83c6fb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -249,6 +249,7 @@ static void ieee80211_restart_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, restart_work); + struct ieee80211_sub_if_data *sdata; /* wait for scan work complete */ flush_workqueue(local->workqueue); @@ -257,6 +258,8 @@ static void ieee80211_restart_work(struct work_struct *work) "%s called with hardware scan in progress\n", __func__); rtnl_lock(); + list_for_each_entry(sdata, &local->interfaces, list) + flush_delayed_work(&sdata->dec_tailroom_needed_wk); ieee80211_scan_cancel(local); ieee80211_reconfig(local); rtnl_unlock(); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d4684242e78bf..afcc67a157fd8 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -680,6 +680,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) *pos++ = 0x0; *pos++ = ieee80211_frequency_to_channel( csa->settings.chandef.chan->center_freq); + bcn->csa_current_counter = csa->settings.count; bcn->csa_counter_offsets[0] = hdr_len + 6; *pos++ = csa->settings.count; *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; @@ -1298,17 +1299,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) sdata_unlock(sdata); } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) -{ - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_sdata_running(sdata)) - ieee80211_queue_work(&local->hw, &sdata->work); - rcu_read_unlock(); -} void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 50c8473cf9dc5..472bdc73e950b 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -358,14 +358,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP; } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); - void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); void ieee80211s_stop(void); #else -static inline void -ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 26053bf2faa8f..8448258299928 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3340,7 +3340,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold && ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { - int sig = ifmgd->ave_beacon_signal; + int sig = ifmgd->ave_beacon_signal / 16; int last_sig = ifmgd->last_ave_beacon_signal; struct ieee80211_event event = { .type = RSSI_EVENT, @@ -4002,8 +4002,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) if (!(flags & IEEE80211_HW_CONNECTION_MONITOR)) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); - /* and do all the other regular work too */ - ieee80211_queue_work(&sdata->local->hw, &sdata->work); } } @@ -4946,6 +4944,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, return 0; } + if (ifmgd->assoc_data && + ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) { + sdata_info(sdata, + "aborting association with %pM by local choice (Reason: %u=%s)\n", + req->bssid, req->reason_code, + ieee80211_get_reason_code_string(req->reason_code)); + + drv_mgd_prepare_tx(sdata->local, sdata); + ieee80211_send_deauth_disassoc(sdata, req->bssid, + IEEE80211_STYPE_DEAUTH, + req->reason_code, tx, + frame_buf); + ieee80211_destroy_assoc_data(sdata, false); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + req->reason_code); + return 0; + } + if (ifmgd->associated && ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { sdata_info(sdata, diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 247552a7f6c2f..3ece7d1034c81 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -92,14 +92,15 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) static inline void minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) { - int j = MAX_THR_RATES; - struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats; + int j; + struct minstrel_rate_stats *tmp_mrs; struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) > - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) { - j--; + for (j = MAX_THR_RATES; j > 0; --j) { tmp_mrs = &mi->r[tp_list[j - 1]].stats; + if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <= + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma)) + break; } if (j < MAX_THR_RATES - 1) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7bb6a9383f58e..ee9351affa5bd 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -310,6 +310,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) bool was_scanning = local->scanning; struct cfg80211_scan_request *scan_req; struct ieee80211_sub_if_data *scan_sdata; + struct ieee80211_sub_if_data *sdata; lockdep_assert_held(&local->mtx); @@ -369,7 +370,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); - ieee80211_mesh_notify_scan_completed(local); + + /* Requeue all the work that might have been ignored while + * the scan was in progress; if there was none this will + * just be a no-op for the particular interface. + */ + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + } + if (was_scanning) ieee80211_start_next_roc(local); } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 4c2e7690226a8..ab19f3c2104dc 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -33,11 +33,11 @@ __field(u32, chan_width) \ __field(u32, center_freq1) \ __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ - __entry->chan_width = (c)->width; \ - __entry->center_freq1 = (c)->center_freq1; \ - __entry->center_freq2 = (c)->center_freq2; +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ + __entry->chan_width = (c) ? (c)->width : 0; \ + __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ + __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; #define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" #define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 667111ee6a20f..5787f15a3a120 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -301,9 +301,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type == NL80211_IFTYPE_WDS) return TX_CONTINUE; - if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - return TX_CONTINUE; - if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b864ebc6ab8fb..67fec9ba97fce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2984,6 +2984,13 @@ ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i) if (end > 0) return false; + /* One shot NOA */ + if (data->count[i] == 1) + return false; + + if (data->desc[i].interval == 0) + return false; + /* End time is in the past, check for repetitions */ skip = DIV_ROUND_UP(-end, data->desc[i].interval); if (data->count[i] < 255) { diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 13cf4d51ce1f9..9ba2526d33ef5 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -64,7 +64,7 @@ endif config DEFAULT_MPTCP_PM string default "default" if DEFAULT_DUMMY - default "fullmesh" if DEFAULT_FULLMESH + default "fullmesh" if DEFAULT_FULLMESH default "ndiffports" if DEFAULT_NDIFFPORTS default "binder" if DEFAULT_BINDER default "default" @@ -92,6 +92,30 @@ config MPTCP_REDUNDANT This scheduler sends all packets redundantly over all subflows to decreases latency and jitter on the cost of lower throughput. +config MPTCP_RBS + tristate "MPTCP Rule-based Scheduler" + depends on (MPTCP=y) + ---help--- + Some description + +config MPTCP_RBSMEASURE + tristate "Measurements" + depends on (MPTCP_RBS=y) + ---help--- + Some description + +config MPTCP_RBSOPT + tristate "Optimizations" + depends on (MPTCP_RBS=y) + ---help--- + Some description + +config MPTCP_RBSEBPF + tristate "eBPF Code Generator" + depends on (MPTCP_RBSOPT=y) && (BPF_JIT=y) + ---help--- + Some description + choice prompt "Default MPTCP Scheduler" default DEFAULT diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 10a98ba7d8c31..c89470377ac3c 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -17,6 +17,6 @@ obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o obj-$(CONFIG_MPTCP_BINDER) += mptcp_binder.o obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o +obj-$(CONFIG_MPTCP_RBS) += mptcp_rbs_sched.o mptcp_rbs_parser.o mptcp_rbs_queue.o mptcp_rbs_lexer.o mptcp_rbs_user.o mptcp_rbs_cfg.o mptcp_rbs_smt.o mptcp_rbs_value.o mptcp_rbs_var.o mptcp_rbs_exec.o mptcp_rbs_ctx.o mptcp_rbs_type.o mptcp_rbs_scheduler.o mptcp_rbs_optimizer.o mptcp_rbs_optimizer_bm.o mptcp_rbs_optimizer_cf.o mptcp_rbs_optimizer_cve.o mptcp_rbs_optimizer_dce.o mptcp_rbs_optimizer_ebpf.o mptcp_rbs_optimizer_ebpf_disasm.o mptcp_rbs_optimizer_ebpf_regalloc.o mptcp_rbs_optimizer_ebpf_lse.o mptcp_rbs_optimizer_lu.o mptcp_rbs_optimizer_vi.o mptcp_rbs_action.o mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o - diff --git a/net/mptcp/mptcp_ctrl.c b/net/mptcp/mptcp_ctrl.c index 3a41b7b3fd070..4fac6dcf748df 100644 --- a/net/mptcp/mptcp_ctrl.c +++ b/net/mptcp/mptcp_ctrl.c @@ -57,6 +57,8 @@ #include #include #include +#include +#include static struct kmem_cache *mptcp_sock_cache __read_mostly; static struct kmem_cache *mptcp_cb_cache __read_mostly; @@ -114,6 +116,90 @@ static int proc_mptcp_scheduler(struct ctl_table *ctl, int write, return ret; } + +int proc_mptcp_scheduler_select_parse(char* str, __be32* ip, __be16* port, char** name) { + unsigned long l; + unsigned int val; + int i; + //inspired by in_aton("10.0.0.2"); + l = 0; + for (i = 0; i < 4; i++) { + l <<= 8; + if (*str != '\0') { + val = 0; + while (*str != '\0' && *str != '.' && *str != '\n' && *str != ':') { + val *= 10; + val += *str - '0'; + str++; + } + l |= val; + if (*str != '\0') + str++; + } + } + *ip = htonl(l); + + val = 0; + while (*str != '\0' && *str != ' ') { + val *= 10; + val += *str - '0'; + str++; + } + *port = val; + + if (*str != ' ') + return 1; // error + str++; + *name = str; + + return 0; +} + +static int proc_mptcp_scheduler_select(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + const int MAX_SIZE = MPTCP_SCHED_NAME_MAX+25; // ip and port + char val[MAX_SIZE]; + struct ctl_table tbl = { + .data = val, + .maxlen = MAX_SIZE, + }; + int ret; + + if(write) { + __be32 ip; + __be16 port; + char* name; + long till_time_s; + struct timespec ts; + getnstimeofday(&ts); + + till_time_s = ts.tv_sec + 60 * 5; // 5 minutes + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (ret == 0) { + ret = proc_mptcp_scheduler_select_parse(val, &ip, &port, &name); + + if(ret == 0) { + mptcp_debug("afr: add for ip %i.%i.%i.%i and port %i scheduler %s at jiffy %llu\n", ip & 0x000000FF, + (ip & 0x0000FF00)>>8, + (ip & 0x00FF0000)>>16, + (ip & 0xFF000000)>>24, + port, name, jiffies); + ret = mptcp_set_default_scheduler_for_tuple(name, ip, port, till_time_s); + } else { + mptcp_debug("afr error parse\n"); + } + } else { + mptcp_debug("afr error dostring\n"); + } + } + + return ret; +} + static struct ctl_table mptcp_table[] = { { .procname = "mptcp_enabled", @@ -164,6 +250,12 @@ static struct ctl_table mptcp_table[] = { .maxlen = MPTCP_SCHED_NAME_MAX, .proc_handler = proc_mptcp_scheduler, }, + { + .procname = "mptcp_schedselect", + .mode = 0644, + .maxlen = MPTCP_SCHED_NAME_MAX+25, + .proc_handler = proc_mptcp_scheduler_select, + }, { } }; @@ -685,6 +777,9 @@ void mptcp_destroy_sock(struct sock *sk) if (is_meta_sk(sk)) { struct sock *sk_it, *tmpsk; + if (tcp_sk(sk)->mpcb->sched_ops->release) + tcp_sk(sk)->mpcb->sched_ops->release(sk); + __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue); mptcp_purge_ofo_queue(tcp_sk(sk)); @@ -1239,6 +1334,8 @@ int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, return -EPERM; } + tp->mptcp->sbf_id = ++mpcb->last_sbf_id; + INIT_HLIST_NODE(&tp->mptcp->cb_list); tp->mptcp->tp = tp; @@ -2448,10 +2545,11 @@ EXPORT_SYMBOL(mptcp_wq); static int mptcp_pm_seq_show(struct seq_file *seq, void *v) { struct tcp_sock *meta_tp; + struct sock *sk; const struct net *net = seq->private; int i, n = 0; - seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode"); + seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode scheduler meta p"); seq_putc(seq, '\n'); for (i = 0; i < MPTCP_HASH_SIZE; i++) { @@ -2493,13 +2591,44 @@ static int mptcp_pm_seq_show(struct seq_file *seq, void *v) ntohs(isk->inet_dport)); #endif } - seq_printf(seq, " %02X %02X %08X:%08X %lu", + seq_printf(seq, " %02X %02X %08X:%08X %lu %s %p", meta_sk->sk_state, mpcb->cnt_subflows, meta_tp->write_seq - meta_tp->snd_una, max_t(int, meta_tp->rcv_nxt - meta_tp->copied_seq, 0), - sock_i_ino(meta_sk)); + sock_i_ino(meta_sk), mpcb->sched_ops->name, meta_sk); seq_putc(seq, '\n'); + +#if 1 + // added more stats per subflow... maybe we should move this to a new file + seq_printf(seq, " snd rcv srtt mdev packets_out retrans_out snd_cwnd\n"); + + for ((sk) = (struct sock *)(mpcb)->connection_list; sk; sk = (struct sock *)tcp_sk(sk)->mptcp->next) { + //mptcp_for_each_sk(mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *isk_tmp = inet_sk(sk); + + seq_printf(seq, "%15llu%15llu%15u%15u%15u%15u%15u %#x %i.%i.%i.%i:%i %i.%i.%i.%i:%i\n", + tp->mptcp->bytes_snd, + tp->mptcp->bytes_rcv, + tp->srtt_us, tp->mdev_us, + tp->packets_out, + tp->retrans_out, + tp->snd_cwnd, + (unsigned int) tp, + isk_tmp->inet_rcv_saddr & 0x000000FF, + (isk_tmp->inet_rcv_saddr & 0x0000FF00)>>8, + (isk_tmp->inet_rcv_saddr & 0x00FF0000)>>16, + (isk_tmp->inet_rcv_saddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_sport), + isk_tmp->inet_daddr & 0x000000FF, + (isk_tmp->inet_daddr & 0x0000FF00)>>8, + (isk_tmp->inet_daddr & 0x00FF0000)>>16, + (isk_tmp->inet_daddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_dport) + ); + } +#endif } rcu_read_unlock_bh(); diff --git a/net/mptcp/mptcp_ctrl.c.save b/net/mptcp/mptcp_ctrl.c.save new file mode 100644 index 0000000000000..30c522206c7e4 --- /dev/null +++ b/net/mptcp/mptcp_ctrl.c.save @@ -0,0 +1,2814 @@ +/* + * MPTCP implementation - MPTCP-control + * + * Initial Design & Implementation: + * Sébastien Barré + * + * Current Maintainer & Author: + * Christoph Paasch + * + * Additional authors: + * Jaakko Korkeaniemi + * Gregory Detal + * Fabien Duchêne + * Andreas Seelinger + * Lavkesh Lahngir + * Andreas Ripke + * Vlad Dogaru + * Octavian Purdila + * John Ronan + * Catalin Nicutar + * Brandon Heller + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *mptcp_sock_cache __read_mostly; +static struct kmem_cache *mptcp_cb_cache __read_mostly; +static struct kmem_cache *mptcp_tw_cache __read_mostly; + +int sysctl_mptcp_enabled __read_mostly = 1; +int sysctl_mptcp_version __read_mostly = 0; +static int min_mptcp_version; +static int max_mptcp_version = 1; +int sysctl_mptcp_checksum __read_mostly = 1; +int sysctl_mptcp_debug __read_mostly; +EXPORT_SYMBOL(sysctl_mptcp_debug); +int sysctl_mptcp_syn_retries __read_mostly = 3; + +bool mptcp_init_failed __read_mostly; + +struct static_key mptcp_static_key = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL(mptcp_static_key); + +static int proc_mptcp_path_manager(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char val[MPTCP_PM_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = MPTCP_PM_NAME_MAX, + }; + int ret; + + mptcp_get_default_path_manager(val); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = mptcp_set_default_path_manager(val); + return ret; +} + +static int proc_mptcp_scheduler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char val[MPTCP_SCHED_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = MPTCP_SCHED_NAME_MAX, + }; + int ret; + + mptcp_get_default_scheduler(val); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = mptcp_set_default_scheduler(val); + return ret; +} + + +int proc_mptcp_scheduler_select_parse(char* str, __be32* ip, __be16* port, char** name) { + unsigned long l; + unsigned int val; + int i; + //inspired by in_aton("10.0.0.2"); + l = 0; + for (i = 0; i < 4; i++) { + l <<= 8; + if (*str != '\0') { + val = 0; + while (*str != '\0' && *str != '.' && *str != '\n' && *str != ':') { + val *= 10; + val += *str - '0'; + str++; + } + l |= val; + if (*str != '\0') + str++; + } + } + *ip = htonl(l); + + val = 0; + while (*str != '\0' && *str != ' ') { + val *= 10; + val += *str - '0'; + str++; + } + *port = val; + + if (*str != ' ') + return 1; // error + str++; + *name = str; + + return 0; +} + +static int proc_mptcp_scheduler_select(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + const int MAX_SIZE = MPTCP_SCHED_NAME_MAX+25; // ip and port + char val[MAX_SIZE]; + struct ctl_table tbl = { + .data = val, + .maxlen = MAX_SIZE, + }; + int ret; + + if(write) { + __be32 ip; + __be16 port; + char* name; + long till_time_s; + struct timespec ts; + getnstimeofday(&ts); + + till_time_s = ts.tv_sec + 60 * 5; // 5 minutes + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (ret == 0) { + ret = proc_mptcp_scheduler_select_parse(val, &ip, &port, &name); + + if(ret == 0) { + mptcp_debug("afr: add for ip %i.%i.%i.%i and port %i scheduler %s at jiffy %llu\n", ip & 0x000000FF, + (ip & 0x0000FF00)>>8, + (ip & 0x00FF0000)>>16, + (ip & 0xFF000000)>>24, + port, name, jiffies); + ret = mptcp_set_default_scheduler_for_tuple(name, ip, port, till_time_s); + } else { + mptcp_debug("afr error parse\n"); + } + } else { + mptcp_debug("afr error dostring\n"); + } + } + + return ret; +} + +static struct ctl_table mptcp_table[] = { + { + .procname = "mptcp_enabled", + .data = &sysctl_mptcp_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_version", + .data = &sysctl_mptcp_version, + .mode = 0644, + .maxlen = sizeof(int), + .proc_handler = &proc_dointvec_minmax, + .extra1 = &min_mptcp_version, + .extra2 = &max_mptcp_version, + }, + { + .procname = "mptcp_checksum", + .data = &sysctl_mptcp_checksum, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_debug", + .data = &sysctl_mptcp_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_syn_retries", + .data = &sysctl_mptcp_syn_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_path_manager", + .mode = 0644, + .maxlen = MPTCP_PM_NAME_MAX, + .proc_handler = proc_mptcp_path_manager, + }, + { + .procname = "mptcp_scheduler", + .mode = 0644, + .maxlen = MPTCP_SCHED_NAME_MAX, + .proc_handler = proc_mptcp_scheduler, + }, + { + .procname = "mptcp_schedselect", + .mode = 0644, + .maxlen = MPTCP_SCHED_NAME_MAX+25, + .proc_handler = proc_mptcp_scheduler_select, + }, + { } +}; + +static inline u32 mptcp_hash_tk(u32 token) +{ + return token % MPTCP_HASH_SIZE; +} + +struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE]; +EXPORT_SYMBOL(tk_hashtable); + +/* This second hashtable is needed to retrieve request socks + * created as a result of a join request. While the SYN contains + * the token, the final ack does not, so we need a separate hashtable + * to retrieve the mpcb. + */ +struct hlist_nulls_head mptcp_reqsk_htb[MPTCP_HASH_SIZE]; +spinlock_t mptcp_reqsk_hlock; /* hashtable protection */ + +/* The following hash table is used to avoid collision of token */ +static struct hlist_nulls_head mptcp_reqsk_tk_htb[MPTCP_HASH_SIZE]; +spinlock_t mptcp_tk_hashlock; /* hashtable protection */ + +static bool mptcp_reqsk_find_tk(const u32 token) +{ + const u32 hash = mptcp_hash_tk(token); + const struct mptcp_request_sock *mtreqsk; + const struct hlist_nulls_node *node; + +begin: + hlist_nulls_for_each_entry_rcu(mtreqsk, node, + &mptcp_reqsk_tk_htb[hash], hash_entry) { + if (token == mtreqsk->mptcp_loc_token) + return true; + } + /* A request-socket is destroyed by RCU. So, it might have been recycled + * and put into another hash-table list. So, after the lookup we may + * end up in a different list. So, we may need to restart. + * + * See also the comment in __inet_lookup_established. + */ + if (get_nulls_value(node) != hash) + goto begin; + return false; +} + +static void mptcp_reqsk_insert_tk(struct request_sock *reqsk, const u32 token) +{ + u32 hash = mptcp_hash_tk(token); + + hlist_nulls_add_head_rcu(&mptcp_rsk(reqsk)->hash_entry, + &mptcp_reqsk_tk_htb[hash]); +} + +static void mptcp_reqsk_remove_tk(const struct request_sock *reqsk) +{ + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&mptcp_rsk(reqsk)->hash_entry); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); +} + +void mptcp_reqsk_destructor(struct request_sock *req) +{ + if (!mptcp_rsk(req)->is_sub) { + if (in_softirq()) { + mptcp_reqsk_remove_tk(req); + } else { + rcu_read_lock_bh(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&mptcp_rsk(req)->hash_entry); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock_bh(); + } + } else { + mptcp_hash_request_remove(req); + } +} + +static void __mptcp_hash_insert(struct tcp_sock *meta_tp, const u32 token) +{ + u32 hash = mptcp_hash_tk(token); + hlist_nulls_add_head_rcu(&meta_tp->tk_table, &tk_hashtable[hash]); + meta_tp->inside_tk_table = 1; +} + +static bool mptcp_find_token(u32 token) +{ + const u32 hash = mptcp_hash_tk(token); + const struct tcp_sock *meta_tp; + const struct hlist_nulls_node *node; + +begin: + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash], tk_table) { + if (token == meta_tp->mptcp_loc_token) + return true; + } + /* A TCP-socket is destroyed by RCU. So, it might have been recycled + * and put into another hash-table list. So, after the lookup we may + * end up in a different list. So, we may need to restart. + * + * See also the comment in __inet_lookup_established. + */ + if (get_nulls_value(node) != hash) + goto begin; + return false; +} + +static void mptcp_set_key_reqsk(struct request_sock *req, + const struct sk_buff *skb, + u32 seed) +{ + const struct inet_request_sock *ireq = inet_rsk(req); + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + if (skb->protocol == htons(ETH_P_IP)) { + mtreq->mptcp_loc_key = mptcp_v4_get_key(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + htons(ireq->ir_num), + ireq->ir_rmt_port, + seed); +#if IS_ENABLED(CONFIG_IPV6) + } else { + mtreq->mptcp_loc_key = mptcp_v6_get_key(ipv6_hdr(skb)->saddr.s6_addr32, + ipv6_hdr(skb)->daddr.s6_addr32, + htons(ireq->ir_num), + ireq->ir_rmt_port, + seed); +#endif + } + + mptcp_key_sha1(mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL); +} + +/* New MPTCP-connection request, prepare a new token for the meta-socket that + * will be created in mptcp_check_req_master(), and store the received token. + */ +static void mptcp_reqsk_new_mptcp(struct request_sock *req, + struct sock *sk, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + + inet_rsk(req)->saw_mpc = 1; + /* MPTCP version agreement */ + if (mopt->mptcp_ver >= tp->mptcp_ver) + mtreq->mptcp_ver = tp->mptcp_ver; + else + mtreq->mptcp_ver = mopt->mptcp_ver; + + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + do { + mptcp_set_key_reqsk(req, skb, mptcp_seed++); + } while (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || + mptcp_find_token(mtreq->mptcp_loc_token)); + mptcp_reqsk_insert_tk(req, mtreq->mptcp_loc_token); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + mtreq->mptcp_rem_key = mopt->mptcp_sender_key; +} + +static int mptcp_reqsk_new_cookie(struct request_sock *req, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + + mptcp_set_key_reqsk(req, skb, tcp_rsk(req)->snt_isn); + + if (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || + mptcp_find_token(mtreq->mptcp_loc_token)) { + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + return false; + } + + inet_rsk(req)->saw_mpc = 1; + + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + + mtreq->mptcp_rem_key = mopt->mptcp_sender_key; + + return true; +} + +static void mptcp_set_key_sk(const struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_sock *isk = inet_sk(sk); + + if (sk->sk_family == AF_INET) + tp->mptcp_loc_key = mptcp_v4_get_key(isk->inet_saddr, + isk->inet_daddr, + isk->inet_sport, + isk->inet_dport, + mptcp_seed++); +#if IS_ENABLED(CONFIG_IPV6) + else + tp->mptcp_loc_key = mptcp_v6_get_key(inet6_sk(sk)->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + isk->inet_sport, + isk->inet_dport, + mptcp_seed++); +#endif + + mptcp_key_sha1(tp->mptcp_loc_key, + &tp->mptcp_loc_token, NULL); +} + +#ifdef HAVE_JUMP_LABEL +/* We are not allowed to call static_key_slow_dec() from irq context + * If mptcp_enable/disable_static_key() is called from irq context, + * defer the static_key_slow_dec() calls. + */ +static atomic_t mptcp_enable_deferred; +#endif + +void mptcp_enable_static_key(void) +{ +#ifdef HAVE_JUMP_LABEL + int deferred; + + if (in_interrupt()) { + atomic_inc(&mptcp_enable_deferred); + return; + } + + deferred = atomic_xchg(&mptcp_enable_deferred, 0); + + if (deferred > 0) { + while (deferred--) + static_key_slow_inc(&mptcp_static_key); + } else if (deferred < 0) { + /* Do exactly one dec less than necessary */ + while (++deferred) + static_key_slow_dec(&mptcp_static_key); + return; + } +#endif + static_key_slow_inc(&mptcp_static_key); + WARN_ON(atomic_read(&mptcp_static_key.enabled) == 0); +} + +void mptcp_disable_static_key(void) +{ +#ifdef HAVE_JUMP_LABEL + int deferred; + + if (in_interrupt()) { + atomic_dec(&mptcp_enable_deferred); + return; + } + + deferred = atomic_xchg(&mptcp_enable_deferred, 0); + + if (deferred > 0) { + /* Do exactly one inc less than necessary */ + while (--deferred) + static_key_slow_inc(&mptcp_static_key); + return; + } else if (deferred < 0) { + while (deferred++) + static_key_slow_dec(&mptcp_static_key); + } +#endif + static_key_slow_dec(&mptcp_static_key); +} + +void mptcp_enable_sock(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_MPTCP)) { + sock_set_flag(sk, SOCK_MPTCP); + tcp_sk(sk)->mptcp_ver = sysctl_mptcp_version; + + /* Necessary here, because MPTCP can be enabled/disabled through + * a setsockopt. + */ + if (sk->sk_family == AF_INET) + inet_csk(sk)->icsk_af_ops = &mptcp_v4_specific; +#if IS_ENABLED(CONFIG_IPV6) + else if (mptcp_v6_is_v4_mapped(sk)) + inet_csk(sk)->icsk_af_ops = &mptcp_v6_mapped; + else + inet_csk(sk)->icsk_af_ops = &mptcp_v6_specific; +#endif + + mptcp_enable_static_key(); + } +} + +void mptcp_disable_sock(struct sock *sk) +{ + if (sock_flag(sk, SOCK_MPTCP)) { + sock_reset_flag(sk, SOCK_MPTCP); + + /* Necessary here, because MPTCP can be enabled/disabled through + * a setsockopt. + */ + if (sk->sk_family == AF_INET) + inet_csk(sk)->icsk_af_ops = &ipv4_specific; +#if IS_ENABLED(CONFIG_IPV6) + else if (mptcp_v6_is_v4_mapped(sk)) + inet_csk(sk)->icsk_af_ops = &ipv6_mapped; + else + inet_csk(sk)->icsk_af_ops = &ipv6_specific; +#endif + + mptcp_disable_static_key(); + } +} + +void mptcp_connect_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + rcu_read_lock_bh(); + spin_lock(&mptcp_tk_hashlock); + do { + mptcp_set_key_sk(sk); + } while (mptcp_reqsk_find_tk(tp->mptcp_loc_token) || + mptcp_find_token(tp->mptcp_loc_token)); + + __mptcp_hash_insert(tp, tp->mptcp_loc_token); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock_bh(); + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); +} + +/** + * This function increments the refcount of the mpcb struct. + * It is the responsibility of the caller to decrement when releasing + * the structure. + */ +struct sock *mptcp_hash_find(const struct net *net, const u32 token) +{ + const u32 hash = mptcp_hash_tk(token); + const struct tcp_sock *meta_tp; + struct sock *meta_sk = NULL; + const struct hlist_nulls_node *node; + + rcu_read_lock(); +begin: + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash], + tk_table) { + meta_sk = (struct sock *)meta_tp; + if (token == meta_tp->mptcp_loc_token && + net_eq(net, sock_net(meta_sk))) { + if (unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt))) + goto out; + if (unlikely(token != meta_tp->mptcp_loc_token || + !net_eq(net, sock_net(meta_sk)))) { + sock_gen_put(meta_sk); + goto begin; + } + goto found; + } + } + /* A TCP-socket is destroyed by RCU. So, it might have been recycled + * and put into another hash-table list. So, after the lookup we may + * end up in a different list. So, we may need to restart. + * + * See also the comment in __inet_lookup_established. + */ + if (get_nulls_value(node) != hash) + goto begin; +out: + meta_sk = NULL; +found: + rcu_read_unlock(); + return meta_sk; +} + +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) +{ + /* remove from the token hashtable */ + rcu_read_lock_bh(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&meta_tp->tk_table); + meta_tp->inside_tk_table = 0; + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock_bh(); +} + +void mptcp_hash_remove(struct tcp_sock *meta_tp) +{ + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&meta_tp->tk_table); + meta_tp->inside_tk_table = 0; + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); +} + +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk) +{ + const struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct sock *sk, *rttsk = NULL, *lastsk = NULL; + u32 min_time = 0, last_active = 0; + + mptcp_for_each_sk(meta_tp->mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + u32 elapsed; + + if (!mptcp_sk_can_send_ack(sk) || tp->pf) + continue; + + elapsed = keepalive_time_elapsed(tp); + + /* We take the one with the lowest RTT within a reasonable + * (meta-RTO)-timeframe + */ + if (elapsed < inet_csk(meta_sk)->icsk_rto) { + if (!min_time || tp->srtt_us < min_time) { + min_time = tp->srtt_us; + rttsk = sk; + } + continue; + } + + /* Otherwise, we just take the most recent active */ + if (!rttsk && (!last_active || elapsed < last_active)) { + last_active = elapsed; + lastsk = sk; + } + } + + if (rttsk) + return rttsk; + + return lastsk; +} +EXPORT_SYMBOL(mptcp_select_ack_sock); + +static void mptcp_sock_def_error_report(struct sock *sk) +{ + const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; + + if (!sock_flag(sk, SOCK_DEAD)) + mptcp_sub_close(sk, 0); + + if (mpcb->infinite_mapping_rcv || mpcb->infinite_mapping_snd || + mpcb->send_infinite_mapping) { + struct sock *meta_sk = mptcp_meta_sk(sk); + + meta_sk->sk_err = sk->sk_err; + meta_sk->sk_err_soft = sk->sk_err_soft; + + if (!sock_flag(meta_sk, SOCK_DEAD)) + meta_sk->sk_error_report(meta_sk); + + tcp_done(meta_sk); + } + + sk->sk_err = 0; + return; +} + +static void mptcp_mpcb_put(struct mptcp_cb *mpcb) +{ + if (atomic_dec_and_test(&mpcb->mpcb_refcnt)) { + mptcp_cleanup_path_manager(mpcb); + mptcp_cleanup_scheduler(mpcb); + kmem_cache_free(mptcp_cb_cache, mpcb); + } +} + +void mptcp_sock_destruct(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!is_meta_sk(sk) && !tp->was_meta_sk) { + BUG_ON(!hlist_unhashed(&tp->mptcp->cb_list)); + + kmem_cache_free(mptcp_sock_cache, tp->mptcp); + tp->mptcp = NULL; + + /* Taken when mpcb pointer was set */ + sock_put(mptcp_meta_sk(sk)); + mptcp_mpcb_put(tp->mpcb); + } else { + struct mptcp_cb *mpcb = tp->mpcb; + struct mptcp_tw *mptw; + + /* The mpcb is disappearing - we can make the final + * update to the rcv_nxt of the time-wait-sock and remove + * its reference to the mpcb. + */ + spin_lock_bh(&mpcb->tw_lock); + list_for_each_entry_rcu(mptw, &mpcb->tw_list, list) { + list_del_rcu(&mptw->list); + mptw->in_list = 0; + mptcp_mpcb_put(mpcb); + rcu_assign_pointer(mptw->mpcb, NULL); + } + spin_unlock_bh(&mpcb->tw_lock); + + mptcp_mpcb_put(mpcb); + + mptcp_debug("%s destroying meta-sk\n", __func__); + } + + WARN_ON(!static_key_false(&mptcp_static_key)); + + /* Must be called here, because this will decrement the jump-label. */ + inet_sock_destruct(sk); +} + +void mptcp_destroy_sock(struct sock *sk) +{ + if (is_meta_sk(sk)) { + struct sock *sk_it, *tmpsk; + + if (tcp_sk(sk)->mpcb->sched_ops->release) + tcp_sk(sk)->mpcb->sched_ops->release(sk); + + __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue); + mptcp_purge_ofo_queue(tcp_sk(sk)); + + /* We have to close all remaining subflows. Normally, they + * should all be about to get closed. But, if the kernel is + * forcing a closure (e.g., tcp_write_err), the subflows might + * not have been closed properly (as we are waiting for the + * DATA_ACK of the DATA_FIN). + */ + mptcp_for_each_sk_safe(tcp_sk(sk)->mpcb, sk_it, tmpsk) { + /* Already did call tcp_close - waiting for graceful + * closure, or if we are retransmitting fast-close on + * the subflow. The reset (or timeout) will kill the + * subflow.. + */ + if (tcp_sk(sk_it)->closing || + tcp_sk(sk_it)->send_mp_fclose) + continue; + + /* Allow the delayed work first to prevent time-wait state */ + if (delayed_work_pending(&tcp_sk(sk_it)->mptcp->work)) + continue; + + mptcp_sub_close(sk_it, 0); + } + } else { + mptcp_del_sock(sk); + } +} + +static void mptcp_set_state(struct sock *sk) +{ + struct sock *meta_sk = mptcp_meta_sk(sk); + + /* Meta is not yet established - wake up the application */ + if ((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) && + sk->sk_state == TCP_ESTABLISHED) { + tcp_set_state(meta_sk, TCP_ESTABLISHED); + + if (!sock_flag(meta_sk, SOCK_DEAD)) { + meta_sk->sk_state_change(meta_sk); + sk_wake_async(meta_sk, SOCK_WAKE_IO, POLL_OUT); + } + } + + if (sk->sk_state == TCP_ESTABLISHED) { + tcp_sk(sk)->mptcp->establish_increased = 1; + tcp_sk(sk)->mpcb->cnt_established++; + } +} + +static void mptcp_assign_congestion_control(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *meta_icsk = inet_csk(mptcp_meta_sk(sk)); + const struct tcp_congestion_ops *ca = meta_icsk->icsk_ca_ops; + + /* Congestion control is the same as meta. Thus, it has been + * try_module_get'd by tcp_assign_congestion_control. + */ + if (icsk->icsk_ca_ops == ca) + return; + + /* Use the same congestion control as set on the meta-sk */ + if (!try_module_get(ca->owner)) { + /* This should never happen. The congestion control is linked + * to the meta-socket (through tcp_assign_congestion_control) + * who "holds" the refcnt on the module. + */ + WARN(1, "Could not get the congestion control!"); + return; + } + icsk->icsk_ca_ops = ca; + + /* Clear out private data before diag gets it and + * the ca has not been initialized. + */ + if (ca->get_info) + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + + return; +} + +u32 mptcp_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +u32 mptcp_seed = 0; + +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn) +{ + u32 workspace[SHA_WORKSPACE_WORDS]; + u32 mptcp_hashed_key[SHA_DIGEST_WORDS]; + u8 input[64]; + int i; + + memset(workspace, 0, sizeof(workspace)); + + /* Initialize input with appropriate padding */ + memset(&input[9], 0, sizeof(input) - 10); /* -10, because the last byte + * is explicitly set too + */ + memcpy(input, &key, sizeof(key)); /* Copy key to the msg beginning */ + input[8] = 0x80; /* Padding: First bit after message = 1 */ + input[63] = 0x40; /* Padding: Length of the message = 64 bits */ + + sha_init(mptcp_hashed_key); + sha_transform(mptcp_hashed_key, input, workspace); + + for (i = 0; i < 5; i++) + mptcp_hashed_key[i] = cpu_to_be32(mptcp_hashed_key[i]); + + if (token) + *token = mptcp_hashed_key[0]; + if (idsn) + *idsn = *((u64 *)&mptcp_hashed_key[3]); +} + +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u32 *hash_out, int arg_num, ...) +{ + u32 workspace[SHA_WORKSPACE_WORDS]; + u8 input[128]; /* 2 512-bit blocks */ + int i; + int index; + int length; + u8 *msg; + va_list list; + + memset(workspace, 0, sizeof(workspace)); + + /* Generate key xored with ipad */ + memset(input, 0x36, 64); + for (i = 0; i < 8; i++) + input[i] ^= key_1[i]; + for (i = 0; i < 8; i++) + input[i + 8] ^= key_2[i]; + + va_start(list, arg_num); + index = 64; + for (i = 0; i < arg_num; i++) { + length = va_arg(list, int); + msg = va_arg(list, u8 *); + BUG_ON(index + length > 125); /* Message is too long */ + memcpy(&input[index], msg, length); + index += length; + } + va_end(list); + + input[index] = 0x80; /* Padding: First bit after message = 1 */ + memset(&input[index + 1], 0, (126 - index)); + + /* Padding: Length of the message = 512 + message length (bits) */ + input[126] = 0x02; + input[127] = ((index - 64) * 8); /* Message length (bits) */ + + sha_init(hash_out); + sha_transform(hash_out, input, workspace); + memset(workspace, 0, sizeof(workspace)); + + sha_transform(hash_out, &input[64], workspace); + memset(workspace, 0, sizeof(workspace)); + + for (i = 0; i < 5; i++) + hash_out[i] = cpu_to_be32(hash_out[i]); + + /* Prepare second part of hmac */ + memset(input, 0x5C, 64); + for (i = 0; i < 8; i++) + input[i] ^= key_1[i]; + for (i = 0; i < 8; i++) + input[i + 8] ^= key_2[i]; + + memcpy(&input[64], hash_out, 20); + input[84] = 0x80; + memset(&input[85], 0, 41); + + /* Padding: Length of the message = 512 + 160 bits */ + input[126] = 0x02; + input[127] = 0xA0; + + sha_init(hash_out); + sha_transform(hash_out, input, workspace); + memset(workspace, 0, sizeof(workspace)); + + sha_transform(hash_out, &input[64], workspace); + + for (i = 0; i < 5; i++) + hash_out[i] = cpu_to_be32(hash_out[i]); +} +EXPORT_SYMBOL(mptcp_hmac_sha1); + +static void mptcp_mpcb_inherit_sockopts(struct sock *meta_sk, struct sock *master_sk) +{ + /* Socket-options handled by sk_clone_lock while creating the meta-sk. + * ====== + * SO_SNDBUF, SO_SNDBUFFORCE, SO_RCVBUF, SO_RCVBUFFORCE, SO_RCVLOWAT, + * SO_RCVTIMEO, SO_SNDTIMEO, SO_ATTACH_FILTER, SO_DETACH_FILTER, + * TCP_NODELAY, TCP_CORK + * + * Socket-options handled in this function here + * ====== + * TCP_DEFER_ACCEPT + * SO_KEEPALIVE + * + * Socket-options on the todo-list + * ====== + * SO_BINDTODEVICE - should probably prevent creation of new subsocks + * across other devices. - what about the api-draft? + * SO_DEBUG + * SO_REUSEADDR - probably we don't care about this + * SO_DONTROUTE, SO_BROADCAST + * SO_OOBINLINE + * SO_LINGER + * SO_TIMESTAMP* - I don't think this is of concern for a SOCK_STREAM + * SO_PASSSEC - I don't think this is of concern for a SOCK_STREAM + * SO_RXQ_OVFL + * TCP_COOKIE_TRANSACTIONS + * TCP_MAXSEG + * TCP_THIN_* - Handled by sk_clone_lock, but we need to support this + * in mptcp_meta_retransmit_timer. AND we need to check + * what is about the subsockets. + * TCP_LINGER2 + * TCP_WINDOW_CLAMP + * TCP_USER_TIMEOUT + * TCP_MD5SIG + * + * Socket-options of no concern for the meta-socket (but for the subsocket) + * ====== + * SO_PRIORITY + * SO_MARK + * TCP_CONGESTION + * TCP_SYNCNT + * TCP_QUICKACK + */ + + /* DEFER_ACCEPT should not be set on the meta, as we want to accept new subflows directly */ + inet_csk(meta_sk)->icsk_accept_queue.rskq_defer_accept = 0; + + /* Keepalives are handled entirely at the MPTCP-layer */ + if (sock_flag(meta_sk, SOCK_KEEPOPEN)) { + inet_csk_reset_keepalive_timer(meta_sk, + keepalive_time_when(tcp_sk(meta_sk))); + sock_reset_flag(master_sk, SOCK_KEEPOPEN); + inet_csk_delete_keepalive_timer(master_sk); + } + + /* Do not propagate subflow-errors up to the MPTCP-layer */ + inet_sk(master_sk)->recverr = 0; +} + +static void mptcp_sub_inherit_sockopts(const struct sock *meta_sk, struct sock *sub_sk) +{ + /* IP_TOS also goes to the subflow. */ + if (inet_sk(sub_sk)->tos != inet_sk(meta_sk)->tos) { + inet_sk(sub_sk)->tos = inet_sk(meta_sk)->tos; + sub_sk->sk_priority = meta_sk->sk_priority; + sk_dst_reset(sub_sk); + } + + /* Inherit SO_REUSEADDR */ + sub_sk->sk_reuse = meta_sk->sk_reuse; + + /* Inherit snd/rcv-buffer locks */ + sub_sk->sk_userlocks = meta_sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + + /* Nagle/Cork is forced off on the subflows. It is handled at the meta-layer */ + tcp_sk(sub_sk)->nonagle = TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + + /* Keepalives are handled entirely at the MPTCP-layer */ + if (sock_flag(sub_sk, SOCK_KEEPOPEN)) { + sock_reset_flag(sub_sk, SOCK_KEEPOPEN); + inet_csk_delete_keepalive_timer(sub_sk); + } + + /* Do not propagate subflow-errors up to the MPTCP-layer */ + inet_sk(sub_sk)->recverr = 0; +} + +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb) +{ + /* skb-sk may be NULL if we receive a packet immediatly after the + * SYN/ACK + MP_CAPABLE. + */ + struct sock *sk = skb->sk ? skb->sk : meta_sk; + int ret = 0; + + skb->sk = NULL; + + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + kfree_skb(skb); + return 0; + } + + if (sk->sk_family == AF_INET) + ret = tcp_v4_do_rcv(sk, skb); +#if IS_ENABLED(CONFIG_IPV6) + else + ret = tcp_v6_do_rcv(sk, skb); +#endif + + sock_put(sk); + return ret; +} + +struct lock_class_key meta_key; +struct lock_class_key meta_slock_key; + +static const struct tcp_sock_ops mptcp_meta_specific = { + .__select_window = __mptcp_select_window, + .select_window = mptcp_select_window, + .select_initial_window = mptcp_select_initial_window, + .select_size = mptcp_select_size, + .init_buffer_space = mptcp_init_buffer_space, + .set_rto = mptcp_tcp_set_rto, + .should_expand_sndbuf = mptcp_should_expand_sndbuf, + .send_fin = mptcp_send_fin, + .write_xmit = mptcp_write_xmit, + .send_active_reset = mptcp_send_active_reset, + .write_wakeup = mptcp_write_wakeup, + .prune_ofo_queue = mptcp_prune_ofo_queue, + .retransmit_timer = mptcp_meta_retransmit_timer, + .time_wait = mptcp_time_wait, + .cleanup_rbuf = mptcp_cleanup_rbuf, +}; + +static const struct tcp_sock_ops mptcp_sub_specific = { + .__select_window = __mptcp_select_window, + .select_window = mptcp_select_window, + .select_initial_window = mptcp_select_initial_window, + .select_size = mptcp_select_size, + .init_buffer_space = mptcp_init_buffer_space, + .set_rto = mptcp_tcp_set_rto, + .should_expand_sndbuf = mptcp_should_expand_sndbuf, + .send_fin = tcp_send_fin, + .write_xmit = tcp_write_xmit, + .send_active_reset = tcp_send_active_reset, + .write_wakeup = tcp_write_wakeup, + .prune_ofo_queue = tcp_prune_ofo_queue, + .retransmit_timer = mptcp_sub_retransmit_timer, + .time_wait = tcp_time_wait, + .cleanup_rbuf = tcp_cleanup_rbuf, +}; + +static int mptcp_alloc_mpcb(struct sock *meta_sk, __u64 remote_key, + __u8 mptcp_ver, u32 window) +{ + struct mptcp_cb *mpcb; + struct sock *master_sk; + struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); + struct tcp_sock *master_tp, *meta_tp = tcp_sk(meta_sk); + u64 idsn; + + dst_release(meta_sk->sk_rx_dst); + meta_sk->sk_rx_dst = NULL; + /* This flag is set to announce sock_lock_init to + * reclassify the lock-class of the master socket. + */ + meta_tp->is_master_sk = 1; + master_sk = sk_clone_lock(meta_sk, GFP_ATOMIC | __GFP_ZERO); + meta_tp->is_master_sk = 0; + if (!master_sk) + return -ENOBUFS; + + master_tp = tcp_sk(master_sk); + + mpcb = kmem_cache_zalloc(mptcp_cb_cache, GFP_ATOMIC); + if (!mpcb) { + /* sk_free (and __sk_free) requirese wmem_alloc to be 1. + * All the rest is set to 0 thanks to __GFP_ZERO above. + */ + atomic_set(&master_sk->sk_wmem_alloc, 1); + sk_free(master_sk); + return -ENOBUFS; + } + +#if IS_ENABLED(CONFIG_IPV6) + if (meta_icsk->icsk_af_ops == &mptcp_v6_mapped) { + struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk); + + inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6; + + newnp = inet6_sk(master_sk); + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; + newnp->opt = NULL; + newnp->pktoptions = NULL; + (void)xchg(&newnp->rxpmtu, NULL); + } else if (meta_sk->sk_family == AF_INET6) { + struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk); + + inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6; + + newnp = inet6_sk(master_sk); + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + + newnp->hop_limit = -1; + newnp->mcast_hops = IPV6_DEFAULT_MCASTHOPS; + newnp->mc_loop = 1; + newnp->pmtudisc = IPV6_PMTUDISC_WANT; + master_sk->sk_ipv6only = sock_net(master_sk)->ipv6.sysctl.bindv6only; + } +#endif + + meta_tp->mptcp = NULL; + + /* Store the mptcp version agreed on initial handshake */ + mpcb->mptcp_ver = mptcp_ver; + + /* Store the keys and generate the peer's token */ + mpcb->mptcp_loc_key = meta_tp->mptcp_loc_key; + mpcb->mptcp_loc_token = meta_tp->mptcp_loc_token; + + /* Generate Initial data-sequence-numbers */ + mptcp_key_sha1(mpcb->mptcp_loc_key, NULL, &idsn); + idsn = ntohll(idsn) + 1; + mpcb->snd_high_order[0] = idsn >> 32; + mpcb->snd_high_order[1] = mpcb->snd_high_order[0] - 1; + + meta_tp->write_seq = (u32)idsn; + meta_tp->snd_sml = meta_tp->write_seq; + meta_tp->snd_una = meta_tp->write_seq; + meta_tp->snd_nxt = meta_tp->write_seq; + meta_tp->pushed_seq = meta_tp->write_seq; + meta_tp->snd_up = meta_tp->write_seq; + + mpcb->mptcp_rem_key = remote_key; + mptcp_key_sha1(mpcb->mptcp_rem_key, &mpcb->mptcp_rem_token, &idsn); + idsn = ntohll(idsn) + 1; + mpcb->rcv_high_order[0] = idsn >> 32; + mpcb->rcv_high_order[1] = mpcb->rcv_high_order[0] + 1; + meta_tp->copied_seq = (u32) idsn; + meta_tp->rcv_nxt = (u32) idsn; + meta_tp->rcv_wup = (u32) idsn; + + meta_tp->snd_wl1 = meta_tp->rcv_nxt - 1; + meta_tp->snd_wnd = window; + meta_tp->retrans_stamp = 0; /* Set in tcp_connect() */ + + meta_tp->packets_out = 0; + meta_icsk->icsk_probes_out = 0; + + /* Set mptcp-pointers */ + master_tp->mpcb = mpcb; + master_tp->meta_sk = meta_sk; + meta_tp->mpcb = mpcb; + meta_tp->meta_sk = meta_sk; + mpcb->meta_sk = meta_sk; + mpcb->master_sk = master_sk; + + meta_tp->was_meta_sk = 0; + + /* Initialize the queues */ + skb_queue_head_init(&mpcb->reinject_queue); + skb_queue_head_init(&master_tp->out_of_order_queue); + tcp_prequeue_init(master_tp); + INIT_LIST_HEAD(&master_tp->tsq_node); + + master_tp->tsq_flags = 0; + + mutex_init(&mpcb->mpcb_mutex); + + /* Init the accept_queue structure, we support a queue of 32 pending + * connections, it does not need to be huge, since we only store here + * pending subflow creations. + */ + if (reqsk_queue_alloc(&meta_icsk->icsk_accept_queue, 32, GFP_ATOMIC)) { + inet_put_port(master_sk); + kmem_cache_free(mptcp_cb_cache, mpcb); + sk_free(master_sk); + return -ENOMEM; + } + + if (!sock_flag(meta_sk, SOCK_MPTCP)) { + mptcp_enable_static_key(); + sock_set_flag(meta_sk, SOCK_MPTCP); + } + + /* Redefine function-pointers as the meta-sk is now fully ready */ + meta_tp->mpc = 1; + meta_tp->ops = &mptcp_meta_specific; + + meta_sk->sk_backlog_rcv = mptcp_backlog_rcv; + meta_sk->sk_destruct = mptcp_sock_destruct; + + /* Meta-level retransmit timer */ + meta_icsk->icsk_rto *= 2; /* Double of initial - rto */ + + tcp_init_xmit_timers(master_sk); + /* Has been set for sending out the SYN */ + inet_csk_clear_xmit_timer(meta_sk, ICSK_TIME_RETRANS); + + if (!meta_tp->inside_tk_table) { + /* Adding the meta_tp in the token hashtable - coming from server-side */ + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + + __mptcp_hash_insert(meta_tp, mpcb->mptcp_loc_token); + + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + } + master_tp->inside_tk_table = 0; + + /* Init time-wait stuff */ + INIT_LIST_HEAD(&mpcb->tw_list); + spin_lock_init(&mpcb->tw_lock); + + INIT_HLIST_HEAD(&mpcb->callback_list); + + mptcp_mpcb_inherit_sockopts(meta_sk, master_sk); + + mpcb->orig_sk_rcvbuf = meta_sk->sk_rcvbuf; + mpcb->orig_sk_sndbuf = meta_sk->sk_sndbuf; + mpcb->orig_window_clamp = meta_tp->window_clamp; + + /* The meta is directly linked - set refcnt to 1 */ + atomic_set(&mpcb->mpcb_refcnt, 1); + + mptcp_init_path_manager(mpcb); + mptcp_init_scheduler(mpcb); + + if (!try_module_get(inet_csk(master_sk)->icsk_ca_ops->owner)) + tcp_assign_congestion_control(master_sk); + + + mptcp_debug("%s: created mpcb with token %#x\n", + __func__, mpcb->mptcp_loc_token); + + return 0; +} + +void mptcp_fallback_meta_sk(struct sock *meta_sk) +{ + kfree(inet_csk(meta_sk)->icsk_accept_queue.listen_opt); + kmem_cache_free(mptcp_cb_cache, tcp_sk(meta_sk)->mpcb); +} + +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, + gfp_t flags) +{ + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + struct tcp_sock *tp = tcp_sk(sk); + + tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, flags); + if (!tp->mptcp) + return -ENOMEM; + + tp->mptcp->path_index = mptcp_set_new_pathindex(mpcb); + /* No more space for more subflows? */ + if (!tp->mptcp->path_index) { + kmem_cache_free(mptcp_sock_cache, tp->mptcp); + return -EPERM; + } + + tp->mptcp->sbf_id = ++mpcb->last_sbf_id; + + INIT_HLIST_NODE(&tp->mptcp->cb_list); + + tp->mptcp->tp = tp; + tp->mpcb = mpcb; + tp->meta_sk = meta_sk; + + if (!sock_flag(sk, SOCK_MPTCP)) { + mptcp_enable_static_key(); + sock_set_flag(sk, SOCK_MPTCP); + } + + tp->mpc = 1; + tp->ops = &mptcp_sub_specific; + + tp->mptcp->loc_id = loc_id; + tp->mptcp->rem_id = rem_id; + if (mpcb->sched_ops->init) + mpcb->sched_ops->init(sk); + + /* The corresponding sock_put is in mptcp_sock_destruct(). It cannot be + * included in mptcp_del_sock(), because the mpcb must remain alive + * until the last subsocket is completely destroyed. + */ + sock_hold(meta_sk); + atomic_inc(&mpcb->mpcb_refcnt); + + tp->mptcp->next = mpcb->connection_list; + mpcb->connection_list = tp; + tp->mptcp->attached = 1; + + mpcb->cnt_subflows++; + atomic_add(atomic_read(&((struct sock *)tp)->sk_rmem_alloc), + &meta_sk->sk_rmem_alloc); + + mptcp_sub_inherit_sockopts(meta_sk, sk); + INIT_DELAYED_WORK(&tp->mptcp->work, mptcp_sub_close_wq); + + /* Properly inherit CC from the meta-socket */ + mptcp_assign_congestion_control(sk); + + /* As we successfully allocated the mptcp_tcp_sock, we have to + * change the function-pointers here (for sk_destruct to work correctly) + */ + sk->sk_error_report = mptcp_sock_def_error_report; + sk->sk_data_ready = mptcp_data_ready; + sk->sk_write_space = mptcp_write_space; + sk->sk_state_change = mptcp_set_state; + sk->sk_destruct = mptcp_sock_destruct; + + if (sk->sk_family == AF_INET) + mptcp_debug("%s: token %#x pi %d, src_addr:%pI4:%d dst_addr:%pI4:%d, cnt_subflows now %d\n", + __func__ , mpcb->mptcp_loc_token, + tp->mptcp->path_index, + &((struct inet_sock *)tp)->inet_saddr, + ntohs(((struct inet_sock *)tp)->inet_sport), + &((struct inet_sock *)tp)->inet_daddr, + ntohs(((struct inet_sock *)tp)->inet_dport), + mpcb->cnt_subflows); +#if IS_ENABLED(CONFIG_IPV6) + else + mptcp_debug("%s: token %#x pi %d, src_addr:%pI6:%d dst_addr:%pI6:%d, cnt_subflows now %d\n", + __func__ , mpcb->mptcp_loc_token, + tp->mptcp->path_index, &inet6_sk(sk)->saddr, + ntohs(((struct inet_sock *)tp)->inet_sport), + &sk->sk_v6_daddr, + ntohs(((struct inet_sock *)tp)->inet_dport), + mpcb->cnt_subflows); +#endif + + return 0; +} + +void mptcp_del_sock(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk), *tp_prev; + struct mptcp_cb *mpcb; + + if (!tp->mptcp || !tp->mptcp->attached) + return; + + mpcb = tp->mpcb; + tp_prev = mpcb->connection_list; + + if (mpcb->sched_ops->release) + mpcb->sched_ops->release(sk); + + mptcp_debug("%s: Removing subsock tok %#x pi:%d state %d is_meta? %d\n", + __func__, mpcb->mptcp_loc_token, tp->mptcp->path_index, + sk->sk_state, is_meta_sk(sk)); + + if (tp_prev == tp) { + mpcb->connection_list = tp->mptcp->next; + } else { + for (; tp_prev && tp_prev->mptcp->next; tp_prev = tp_prev->mptcp->next) { + if (tp_prev->mptcp->next == tp) { + tp_prev->mptcp->next = tp->mptcp->next; + break; + } + } + } + mpcb->cnt_subflows--; + if (tp->mptcp->establish_increased) + mpcb->cnt_established--; + + tp->mptcp->next = NULL; + tp->mptcp->attached = 0; + mpcb->path_index_bits &= ~(1 << tp->mptcp->path_index); + + if (!skb_queue_empty(&sk->sk_write_queue)) + mptcp_reinject_data(sk, 0); + + if (is_master_tp(tp)) + mpcb->master_sk = NULL; + else if (tp->mptcp->pre_established) + sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer); + + rcu_assign_pointer(inet_sk(sk)->inet_opt, NULL); +} + +/* Updates the MPTCP-session based on path-manager information (e.g., addresses, + * low-prio flows,...). + */ +void mptcp_update_metasocket(struct sock *sk, const struct sock *meta_sk) +{ + if (tcp_sk(sk)->mpcb->pm_ops->new_session) + tcp_sk(sk)->mpcb->pm_ops->new_session(meta_sk); +} + +/* Clean up the receive buffer for full frames taken by the user, + * then send an ACK if necessary. COPIED is the number of bytes + * tcp_recvmsg has given to the user so far, it speeds up the + * calculation of whether or not we must ACK for the sake of + * a window update. + */ +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct sock *sk; + __u32 rcv_window_now = 0; + + if (copied > 0 && !(meta_sk->sk_shutdown & RCV_SHUTDOWN)) { + rcv_window_now = tcp_receive_window(meta_tp); + + if (2 * rcv_window_now > meta_tp->window_clamp) + rcv_window_now = 0; + } + + mptcp_for_each_sk(meta_tp->mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (!mptcp_sk_can_send_ack(sk)) + continue; + + if (!inet_csk_ack_scheduled(sk)) + goto second_part; + /* Delayed ACKs frequently hit locked sockets during bulk + * receive. + */ + if (icsk->icsk_ack.blocked || + /* Once-per-two-segments ACK was not sent by tcp_input.c */ + tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || + /* If this read emptied read buffer, we send ACK, if + * connection is not bidirectional, user drained + * receive buffer and there was a small segment + * in queue. + */ + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&meta_sk->sk_rmem_alloc))) { + tcp_send_ack(sk); + continue; + } + +second_part: + /* This here is the second part of tcp_cleanup_rbuf */ + if (rcv_window_now) { + __u32 new_window = tp->ops->__select_window(sk); + + /* Send ACK now, if this read freed lots of space + * in our buffer. Certainly, new_window is new window. + * We can advertise it now, if it is not less than + * current one. + * "Lots" means "at least twice" here. + */ + if (new_window && new_window >= 2 * rcv_window_now) + tcp_send_ack(sk); + } + } +} + +static int mptcp_sub_send_fin(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb = tcp_write_queue_tail(sk); + int mss_now; + + /* Optimization, tack on the FIN if we have a queue of + * unsent frames. But be careful about outgoing SACKS + * and IP options. + */ + mss_now = tcp_current_mss(sk); + + if (tcp_send_head(sk) != NULL) { + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(skb)->end_seq++; + tp->write_seq++; + } else { + skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_ATOMIC); + if (!skb) + return 1; + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, MAX_TCP_HEADER); + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ + tcp_init_nondata_skb(skb, tp->write_seq, + TCPHDR_ACK | TCPHDR_FIN); + tcp_queue_skb(sk, skb); + } + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + + return 0; +} + +void mptcp_sub_close_wq(struct work_struct *work) +{ + struct tcp_sock *tp = container_of(work, struct mptcp_tcp_sock, work.work)->tp; + struct sock *sk = (struct sock *)tp; + struct sock *meta_sk = mptcp_meta_sk(sk); + + mutex_lock(&tp->mpcb->mpcb_mutex); + lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); + + if (sock_flag(sk, SOCK_DEAD)) + goto exit; + + /* We come from tcp_disconnect. We are sure that meta_sk is set */ + if (!mptcp(tp)) { + tp->closing = 1; + tcp_close(sk, 0); + goto exit; + } + + if (meta_sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) { + tp->closing = 1; + tcp_close(sk, 0); + } else if (tcp_close_state(sk)) { + sk->sk_shutdown |= SEND_SHUTDOWN; + tcp_send_fin(sk); + } + +exit: + release_sock(meta_sk); + mutex_unlock(&tp->mpcb->mpcb_mutex); + sock_put(sk); +} + +void mptcp_sub_close(struct sock *sk, unsigned long delay) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct delayed_work *work = &tcp_sk(sk)->mptcp->work; + + /* We are already closing - e.g., call from sock_def_error_report upon + * tcp_disconnect in tcp_close. + */ + if (tp->closing) + return; + + /* Work already scheduled ? */ + if (work_pending(&work->work)) { + /* Work present - who will be first ? */ + if (jiffies + delay > work->timer.expires) + return; + + /* Try canceling - if it fails, work will be executed soon */ + if (!cancel_delayed_work(work)) + return; + sock_put(sk); + } + + if (!delay) { + unsigned char old_state = sk->sk_state; + + /* If we are in user-context we can directly do the closing + * procedure. No need to schedule a work-queue. + */ + if (!in_softirq()) { + if (sock_flag(sk, SOCK_DEAD)) + return; + + if (!mptcp(tp)) { + tp->closing = 1; + tcp_close(sk, 0); + return; + } + + if (mptcp_meta_sk(sk)->sk_shutdown == SHUTDOWN_MASK || + sk->sk_state == TCP_CLOSE) { + tp->closing = 1; + tcp_close(sk, 0); + } else if (tcp_close_state(sk)) { + sk->sk_shutdown |= SEND_SHUTDOWN; + tcp_send_fin(sk); + } + + return; + } + + /* We directly send the FIN. Because it may take so a long time, + * untile the work-queue will get scheduled... + * + * If mptcp_sub_send_fin returns 1, it failed and thus we reset + * the old state so that tcp_close will finally send the fin + * in user-context. + */ + if (!sk->sk_err && old_state != TCP_CLOSE && + tcp_close_state(sk) && mptcp_sub_send_fin(sk)) { + if (old_state == TCP_ESTABLISHED) + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); + sk->sk_state = old_state; + } + } + + sock_hold(sk); + queue_delayed_work(mptcp_wq, work, delay); +} + +void mptcp_sub_force_close(struct sock *sk) +{ + /* The below tcp_done may have freed the socket, if he is already dead. + * Thus, we are not allowed to access it afterwards. That's why + * we have to store the dead-state in this local variable. + */ + int sock_is_dead = sock_flag(sk, SOCK_DEAD); + + tcp_sk(sk)->mp_killed = 1; + + if (sk->sk_state != TCP_CLOSE) + tcp_done(sk); + + if (!sock_is_dead) + mptcp_sub_close(sk, 0); +} +EXPORT_SYMBOL(mptcp_sub_force_close); + +/* Update the mpcb send window, based on the contributions + * of each subflow + */ +void mptcp_update_sndbuf(const struct tcp_sock *tp) +{ + struct sock *meta_sk = tp->meta_sk, *sk; + int new_sndbuf = 0, old_sndbuf = meta_sk->sk_sndbuf; + + mptcp_for_each_sk(tp->mpcb, sk) { + if (!mptcp_sk_can_send(sk)) + continue; + + new_sndbuf += sk->sk_sndbuf; + + if (new_sndbuf > sysctl_tcp_wmem[2] || new_sndbuf < 0) { + new_sndbuf = sysctl_tcp_wmem[2]; + break; + } + } + meta_sk->sk_sndbuf = max(min(new_sndbuf, sysctl_tcp_wmem[2]), meta_sk->sk_sndbuf); + + /* The subflow's call to sk_write_space in tcp_new_space ends up in + * mptcp_write_space. + * It has nothing to do with waking up the application. + * So, we do it here. + */ + if (old_sndbuf != meta_sk->sk_sndbuf) + meta_sk->sk_write_space(meta_sk); +} + +void mptcp_close(struct sock *meta_sk, long timeout) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct sock *sk_it, *tmpsk; + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sk_buff *skb; + int data_was_unread = 0; + int state; + + mptcp_debug("%s: Close of meta_sk with tok %#x\n", + __func__, mpcb->mptcp_loc_token); + + mutex_lock(&mpcb->mpcb_mutex); + lock_sock(meta_sk); + + if (meta_tp->inside_tk_table) { + /* Detach the mpcb from the token hashtable */ + mptcp_hash_remove_bh(meta_tp); + reqsk_queue_destroy(&inet_csk(meta_sk)->icsk_accept_queue); + } + + meta_sk->sk_shutdown = SHUTDOWN_MASK; + /* We need to flush the recv. buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + * reader process may not have drained the data yet! + */ + while ((skb = __skb_dequeue(&meta_sk->sk_receive_queue)) != NULL) { + u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + len--; + data_was_unread += len; + __kfree_skb(skb); + } + + sk_mem_reclaim(meta_sk); + + /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ + if (meta_sk->sk_state == TCP_CLOSE) { + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) { + if (tcp_sk(sk_it)->send_mp_fclose) + continue; + mptcp_sub_close(sk_it, 0); + } + goto adjudge_to_death; + } + + if (data_was_unread) { + /* Unread data was tossed, zap the connection. */ + NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONCLOSE); + tcp_set_state(meta_sk, TCP_CLOSE); + tcp_sk(meta_sk)->ops->send_active_reset(meta_sk, + meta_sk->sk_allocation); + } else if (sock_flag(meta_sk, SOCK_LINGER) && !meta_sk->sk_lingertime) { + /* Check zero linger _after_ checking for unread data. */ + meta_sk->sk_prot->disconnect(meta_sk, 0); + NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA); + } else if (tcp_close_state(meta_sk)) { + mptcp_send_fin(meta_sk); + } else if (meta_tp->snd_una == meta_tp->write_seq) { + /* The DATA_FIN has been sent and acknowledged + * (e.g., by sk_shutdown). Close all the other subflows + */ + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) { + unsigned long delay = 0; + /* If we are the passive closer, don't trigger + * subflow-fin until the subflow has been finned + * by the peer. - thus we add a delay + */ + if (mpcb->passive_close && + sk_it->sk_state == TCP_ESTABLISHED) + delay = inet_csk(sk_it)->icsk_rto << 3; + + mptcp_sub_close(sk_it, delay); + } + } + + sk_stream_wait_close(meta_sk, timeout); + +adjudge_to_death: + state = meta_sk->sk_state; + sock_hold(meta_sk); + sock_orphan(meta_sk); + + /* socket will be freed after mptcp_close - we have to prevent + * access from the subflows. + */ + mptcp_for_each_sk(mpcb, sk_it) { + /* Similar to sock_orphan, but we don't set it DEAD, because + * the callbacks are still set and must be called. + */ + write_lock_bh(&sk_it->sk_callback_lock); + sk_set_socket(sk_it, NULL); + sk_it->sk_wq = NULL; + write_unlock_bh(&sk_it->sk_callback_lock); + } + + /* It is the last release_sock in its life. It will remove backlog. */ + release_sock(meta_sk); + + /* Now socket is owned by kernel and we acquire BH lock + * to finish close. No need to check for user refs. + */ + local_bh_disable(); + bh_lock_sock(meta_sk); + WARN_ON(sock_owned_by_user(meta_sk)); + + percpu_counter_inc(meta_sk->sk_prot->orphan_count); + + /* Have we already been destroyed by a softirq or backlog? */ + if (state != TCP_CLOSE && meta_sk->sk_state == TCP_CLOSE) + goto out; + + /* This is a (useful) BSD violating of the RFC. There is a + * problem with TCP as specified in that the other end could + * keep a socket open forever with no application left this end. + * We use a 3 minute timeout (about the same as BSD) then kill + * our end. If they send after that then tough - BUT: long enough + * that we won't make the old 4*rto = almost no time - whoops + * reset mistake. + * + * Nope, it was not mistake. It is really desired behaviour + * f.e. on http servers, when such sockets are useless, but + * consume significant resources. Let's do it with special + * linger2 option. --ANK + */ + + if (meta_sk->sk_state == TCP_FIN_WAIT2) { + if (meta_tp->linger2 < 0) { + tcp_set_state(meta_sk, TCP_CLOSE); + meta_tp->ops->send_active_reset(meta_sk, GFP_ATOMIC); + NET_INC_STATS_BH(sock_net(meta_sk), + LINUX_MIB_TCPABORTONLINGER); + } else { + const int tmo = tcp_fin_time(meta_sk); + + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(meta_sk, + tmo - TCP_TIMEWAIT_LEN); + } else { + meta_tp->ops->time_wait(meta_sk, TCP_FIN_WAIT2, + tmo); + goto out; + } + } + } + if (meta_sk->sk_state != TCP_CLOSE) { + sk_mem_reclaim(meta_sk); + if (tcp_too_many_orphans(meta_sk, 0)) { + if (net_ratelimit()) + pr_info("MPTCP: too many of orphaned sockets\n"); + tcp_set_state(meta_sk, TCP_CLOSE); + meta_tp->ops->send_active_reset(meta_sk, GFP_ATOMIC); + NET_INC_STATS_BH(sock_net(meta_sk), + LINUX_MIB_TCPABORTONMEMORY); + } + } + + + if (meta_sk->sk_state == TCP_CLOSE) + inet_csk_destroy_sock(meta_sk); + /* Otherwise, socket is reprieved until protocol close. */ + +out: + bh_unlock_sock(meta_sk); + local_bh_enable(); + mutex_unlock(&mpcb->mpcb_mutex); + sock_put(meta_sk); /* Taken by sock_hold */ +} + +void mptcp_disconnect(struct sock *sk) +{ + struct sock *subsk, *tmpsk; + struct tcp_sock *tp = tcp_sk(sk); + + __skb_queue_purge(&tp->mpcb->reinject_queue); + + if (tp->inside_tk_table) { + mptcp_hash_remove_bh(tp); + reqsk_queue_destroy(&inet_csk(tp->meta_sk)->icsk_accept_queue); + } + + local_bh_disable(); + mptcp_for_each_sk_safe(tp->mpcb, subsk, tmpsk) { + /* The socket will get removed from the subsocket-list + * and made non-mptcp by setting mpc to 0. + * + * This is necessary, because tcp_disconnect assumes + * that the connection is completly dead afterwards. + * Thus we need to do a mptcp_del_sock. Due to this call + * we have to make it non-mptcp. + * + * We have to lock the socket, because we set mpc to 0. + * An incoming packet would take the subsocket's lock + * and go on into the receive-path. + * This would be a race. + */ + + bh_lock_sock(subsk); + mptcp_del_sock(subsk); + tcp_sk(subsk)->mpc = 0; + tcp_sk(subsk)->ops = &tcp_specific; + mptcp_sub_force_close(subsk); + bh_unlock_sock(subsk); + } + local_bh_enable(); + + tp->was_meta_sk = 1; + tp->mpc = 0; + tp->ops = &tcp_specific; +} + + +/* Returns 1 if we should enable MPTCP for that socket. */ +int mptcp_doit(struct sock *sk) +{ + /* Don't do mptcp over loopback */ + if (sk->sk_family == AF_INET && + (ipv4_is_loopback(inet_sk(sk)->inet_daddr) || + ipv4_is_loopback(inet_sk(sk)->inet_saddr))) + return 0; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + (ipv6_addr_loopback(&sk->sk_v6_daddr) || + ipv6_addr_loopback(&inet6_sk(sk)->saddr))) + return 0; +#endif + if (mptcp_v6_is_v4_mapped(sk) && + ipv4_is_loopback(inet_sk(sk)->inet_saddr)) + return 0; + +#ifdef CONFIG_TCP_MD5SIG + /* If TCP_MD5SIG is enabled, do not do MPTCP - there is no Option-Space */ + if (tcp_sk(sk)->af_specific->md5_lookup(sk, sk)) + return 0; +#endif + + return 1; +} + +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, + __u8 mptcp_ver, u32 window) +{ + struct tcp_sock *master_tp; + struct sock *master_sk; + + if (mptcp_alloc_mpcb(meta_sk, remote_key, mptcp_ver, window)) + goto err_alloc_mpcb; + + master_sk = tcp_sk(meta_sk)->mpcb->master_sk; + master_tp = tcp_sk(master_sk); + + if (mptcp_add_sock(meta_sk, master_sk, 0, 0, GFP_ATOMIC)) + goto err_add_sock; + + if (__inet_inherit_port(meta_sk, master_sk) < 0) + goto err_add_sock; + + meta_sk->sk_prot->unhash(meta_sk); + + if (master_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(master_sk)) + __inet_hash_nolisten(master_sk, NULL); +#if IS_ENABLED(CONFIG_IPV6) + else + __inet_hash(master_sk, NULL); +#endif + + master_tp->mptcp->init_rcv_wnd = master_tp->rcv_wnd; + + return 0; + +err_add_sock: + mptcp_fallback_meta_sk(meta_sk); + + inet_csk_prepare_forced_close(master_sk); + tcp_done(master_sk); + inet_csk_prepare_forced_close(meta_sk); + tcp_done(meta_sk); + +err_alloc_mpcb: + return -ENOBUFS; +} + +static int __mptcp_check_req_master(struct sock *child, + struct request_sock *req) +{ + struct tcp_sock *child_tp = tcp_sk(child); + struct sock *meta_sk = child; + struct mptcp_cb *mpcb; + struct mptcp_request_sock *mtreq; + + /* Never contained an MP_CAPABLE */ + if (!inet_rsk(req)->mptcp_rqsk) + return 1; + + if (!inet_rsk(req)->saw_mpc) { + /* Fallback to regular TCP, because we saw one SYN without + * MP_CAPABLE. In tcp_check_req we continue the regular path. + * But, the socket has been added to the reqsk_tk_htb, so we + * must still remove it. + */ + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); + mptcp_reqsk_remove_tk(req); + return 1; + } + + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + + /* Just set this values to pass them to mptcp_alloc_mpcb */ + mtreq = mptcp_rsk(req); + child_tp->mptcp_loc_key = mtreq->mptcp_loc_key; + child_tp->mptcp_loc_token = mtreq->mptcp_loc_token; + + if (mptcp_create_master_sk(meta_sk, mtreq->mptcp_rem_key, + mtreq->mptcp_ver, child_tp->snd_wnd)) + return -ENOBUFS; + + child = tcp_sk(child)->mpcb->master_sk; + child_tp = tcp_sk(child); + mpcb = child_tp->mpcb; + + child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn; + child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn; + + mpcb->dss_csum = mtreq->dss_csum; + mpcb->server_side = 1; + + /* Will be moved to ESTABLISHED by tcp_rcv_state_process() */ + mptcp_update_metasocket(child, meta_sk); + + /* Needs to be done here additionally, because when accepting a + * new connection we pass by __reqsk_free and not reqsk_free. + */ + mptcp_reqsk_remove_tk(req); + + /* Hold when creating the meta-sk in tcp_vX_syn_recv_sock. */ + sock_put(meta_sk); + + return 0; +} + +int mptcp_check_req_fastopen(struct sock *child, struct request_sock *req) +{ + struct sock *meta_sk = child, *master_sk; + struct sk_buff *skb; + u32 new_mapping; + int ret; + + ret = __mptcp_check_req_master(child, req); + if (ret) + return ret; + + master_sk = tcp_sk(meta_sk)->mpcb->master_sk; + + /* We need to rewind copied_seq as it is set to IDSN + 1 and as we have + * pre-MPTCP data in the receive queue. + */ + tcp_sk(meta_sk)->copied_seq -= tcp_sk(master_sk)->rcv_nxt - + tcp_rsk(req)->rcv_isn - 1; + + /* Map subflow sequence number to data sequence numbers. We need to map + * these data to [IDSN - len - 1, IDSN[. + */ + new_mapping = tcp_sk(meta_sk)->copied_seq - tcp_rsk(req)->rcv_isn - 1; + + /* There should be only one skb: the SYN + data. */ + skb_queue_walk(&meta_sk->sk_receive_queue, skb) { + TCP_SKB_CB(skb)->seq += new_mapping; + TCP_SKB_CB(skb)->end_seq += new_mapping; + } + + /* With fastopen we change the semantics of the relative subflow + * sequence numbers to deal with middleboxes that could add/remove + * multiple bytes in the SYN. We chose to start counting at rcv_nxt - 1 + * instead of the regular TCP ISN. + */ + tcp_sk(master_sk)->mptcp->rcv_isn = tcp_sk(master_sk)->rcv_nxt - 1; + + /* We need to update copied_seq of the master_sk to account for the + * already moved data to the meta receive queue. + */ + tcp_sk(master_sk)->copied_seq = tcp_sk(master_sk)->rcv_nxt; + + /* Handled by the master_sk */ + tcp_sk(meta_sk)->fastopen_rsk = NULL; + + return 0; +} + +int mptcp_check_req_master(struct sock *sk, struct sock *child, + struct request_sock *req, + int drop) +{ + struct sock *meta_sk = child; + int ret; + + ret = __mptcp_check_req_master(child, req); + if (ret) + return ret; + + /* drop indicates that we come from tcp_check_req and thus need to + * handle the request-socket fully. + */ + if (drop) { + inet_csk_reqsk_queue_drop(sk, req); + } else { + /* Thus, we come from syn-cookies */ + atomic_set(&req->rsk_refcnt, 1); + } + inet_csk_reqsk_queue_add(sk, req, meta_sk); + + return 0; +} + +struct sock *mptcp_check_req_child(struct sock *meta_sk, struct sock *child, + struct request_sock *req, + const struct mptcp_options_received *mopt) +{ + struct tcp_sock *child_tp = tcp_sk(child); + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + u8 hash_mac_check[20]; + + child_tp->inside_tk_table = 0; + + if (!mopt->join_ack) { + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_JOINACKFAIL); + goto teardown; + } + + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_rem_key, + (u8 *)&mpcb->mptcp_loc_key, + (u32 *)hash_mac_check, 2, + 4, (u8 *)&mtreq->mptcp_rem_nonce, + 4, (u8 *)&mtreq->mptcp_loc_nonce); + + if (memcmp(hash_mac_check, (char *)&mopt->mptcp_recv_mac, 20)) { + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_JOINACKMAC); + goto teardown; + } + + /* Point it to the same struct socket and wq as the meta_sk */ + sk_set_socket(child, meta_sk->sk_socket); + child->sk_wq = meta_sk->sk_wq; + + if (mptcp_add_sock(meta_sk, child, mtreq->loc_id, mtreq->rem_id, GFP_ATOMIC)) { + /* Has been inherited, but now child_tp->mptcp is NULL */ + child_tp->mpc = 0; + child_tp->ops = &tcp_specific; + + /* TODO when we support acking the third ack for new subflows, + * we should silently discard this third ack, by returning NULL. + * + * Maybe, at the retransmission we will have enough memory to + * fully add the socket to the meta-sk. + */ + goto teardown; + } + + /* The child is a clone of the meta socket, we must now reset + * some of the fields + */ + child_tp->mptcp->rcv_low_prio = mtreq->rcv_low_prio; + + /* We should allow proper increase of the snd/rcv-buffers. Thus, we + * use the original values instead of the bloated up ones from the + * clone. + */ + child->sk_sndbuf = mpcb->orig_sk_sndbuf; + child->sk_rcvbuf = mpcb->orig_sk_rcvbuf; + + child_tp->mptcp->slave_sk = 1; + child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn; + child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn; + child_tp->mptcp->init_rcv_wnd = req->rcv_wnd; + + child_tp->tsq_flags = 0; + + /* Subflows do not use the accept queue, as they + * are attached immediately to the mpcb. + */ + inet_csk_reqsk_queue_drop(meta_sk, req); + + /* The refcnt is initialized to 2, because regular TCP will put him + * in the socket's listener queue. However, we do not have a listener-queue. + * So, we need to make sure that this request-sock indeed gets destroyed. + */ + reqsk_put(req); + + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_JOINACKRX); + return child; + +teardown: + /* Drop this request - sock creation failed. */ + inet_csk_reqsk_queue_drop(meta_sk, req); + reqsk_put(req); + inet_csk_prepare_forced_close(child); + tcp_done(child); + return meta_sk; +} + +int mptcp_init_tw_sock(struct sock *sk, struct tcp_timewait_sock *tw) +{ + struct mptcp_tw *mptw; + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_cb *mpcb = tp->mpcb; + + /* A subsocket in tw can only receive data. So, if we are in + * infinite-receive, then we should not reply with a data-ack or act + * upon general MPTCP-signaling. We prevent this by simply not creating + * the mptcp_tw_sock. + */ + if (mpcb->infinite_mapping_rcv) { + tw->mptcp_tw = NULL; + return 0; + } + + /* Alloc MPTCP-tw-sock */ + mptw = kmem_cache_alloc(mptcp_tw_cache, GFP_ATOMIC); + if (!mptw) { + tw->mptcp_tw = NULL; + return -ENOBUFS; + } + + atomic_inc(&mpcb->mpcb_refcnt); + + tw->mptcp_tw = mptw; + mptw->loc_key = mpcb->mptcp_loc_key; + mptw->meta_tw = mpcb->in_time_wait; + if (mptw->meta_tw) { + mptw->rcv_nxt = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp)); + if (mpcb->mptw_state != TCP_TIME_WAIT) + mptw->rcv_nxt++; + } + rcu_assign_pointer(mptw->mpcb, mpcb); + + spin_lock(&mpcb->tw_lock); + list_add_rcu(&mptw->list, &tp->mpcb->tw_list); + mptw->in_list = 1; + spin_unlock(&mpcb->tw_lock); + + return 0; +} + +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) +{ + struct mptcp_cb *mpcb; + + rcu_read_lock(); + mpcb = rcu_dereference(tw->mptcp_tw->mpcb); + + /* If we are still holding a ref to the mpcb, we have to remove ourself + * from the list and drop the ref properly. + */ + if (mpcb && atomic_inc_not_zero(&mpcb->mpcb_refcnt)) { + spin_lock(&mpcb->tw_lock); + if (tw->mptcp_tw->in_list) { + list_del_rcu(&tw->mptcp_tw->list); + tw->mptcp_tw->in_list = 0; + } + spin_unlock(&mpcb->tw_lock); + + /* Twice, because we increased it above */ + mptcp_mpcb_put(mpcb); + mptcp_mpcb_put(mpcb); + } + + rcu_read_unlock(); + + kmem_cache_free(mptcp_tw_cache, tw->mptcp_tw); +} + +/* Updates the rcv_nxt of the time-wait-socks and allows them to ack a + * data-fin. + */ +void mptcp_time_wait(struct sock *sk, int state, int timeo) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_tw *mptw; + + /* Used for sockets that go into tw after the meta + * (see mptcp_init_tw_sock()) + */ + tp->mpcb->in_time_wait = 1; + tp->mpcb->mptw_state = state; + + /* Update the time-wait-sock's information */ + rcu_read_lock_bh(); + list_for_each_entry_rcu(mptw, &tp->mpcb->tw_list, list) { + mptw->meta_tw = 1; + mptw->rcv_nxt = mptcp_get_rcv_nxt_64(tp); + + /* We want to ack a DATA_FIN, but are yet in FIN_WAIT_2 - + * pretend as if the DATA_FIN has already reached us, that way + * the checks in tcp_timewait_state_process will be good as the + * DATA_FIN comes in. + */ + if (state != TCP_TIME_WAIT) + mptw->rcv_nxt++; + } + rcu_read_unlock_bh(); + + tcp_done(sk); +} + +void mptcp_tsq_flags(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sock *meta_sk = mptcp_meta_sk(sk); + + /* It will be handled as a regular deferred-call */ + if (is_meta_sk(sk)) + return; + + if (hlist_unhashed(&tp->mptcp->cb_list)) { + hlist_add_head(&tp->mptcp->cb_list, &tp->mpcb->callback_list); + /* We need to hold it here, as the sock_hold is not assured + * by the release_sock as it is done in regular TCP. + * + * The subsocket may get inet_csk_destroy'd while it is inside + * the callback_list. + */ + sock_hold(sk); + } + + if (!test_and_set_bit(MPTCP_SUB_DEFERRED, &tcp_sk(meta_sk)->tsq_flags)) + sock_hold(meta_sk); +} + +void mptcp_tsq_sub_deferred(struct sock *meta_sk) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_tcp_sock *mptcp; + struct hlist_node *tmp; + + BUG_ON(!is_meta_sk(meta_sk) && !meta_tp->was_meta_sk); + + __sock_put(meta_sk); + hlist_for_each_entry_safe(mptcp, tmp, &meta_tp->mpcb->callback_list, cb_list) { + struct tcp_sock *tp = mptcp->tp; + struct sock *sk = (struct sock *)tp; + + hlist_del_init(&mptcp->cb_list); + sk->sk_prot->release_cb(sk); + /* Final sock_put (cfr. mptcp_tsq_flags */ + sock_put(sk); + } +} + +void mptcp_join_reqsk_init(struct mptcp_cb *mpcb, const struct request_sock *req, + struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct mptcp_options_received mopt; + u8 mptcp_hash_mac[20]; + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); + + mtreq->mptcp_mpcb = mpcb; + mtreq->is_sub = 1; + inet_rsk(req)->mptcp_rqsk = 1; + + mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce; + + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key, + (u8 *)&mpcb->mptcp_rem_key, + (u32 *)mptcp_hash_mac, 2, + 4, (u8 *)&mtreq->mptcp_loc_nonce, + 4, (u8 *)&mtreq->mptcp_rem_nonce); + mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac; + + mtreq->rem_id = mopt.rem_id; + mtreq->rcv_low_prio = mopt.low_prio; + inet_rsk(req)->saw_mpc = 1; + + MPTCP_INC_STATS_BH(sock_net(mpcb->meta_sk), MPTCP_MIB_JOINSYNRX); +} + +void mptcp_reqsk_init(struct request_sock *req, struct sock *sk, + const struct sk_buff *skb, bool want_cookie) +{ + struct mptcp_options_received mopt; + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); + + mtreq->dss_csum = mopt.dss_csum; + + if (want_cookie) { + if (!mptcp_reqsk_new_cookie(req, &mopt, skb)) + /* No key available - back to regular TCP */ + inet_rsk(req)->mptcp_rqsk = 0; + return; + } + + mptcp_reqsk_new_mptcp(req, sk, &mopt, skb); +} + +void mptcp_cookies_reqsk_init(struct request_sock *req, + struct mptcp_options_received *mopt, + struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + /* Absolutely need to always initialize this. */ + mtreq->hash_entry.pprev = NULL; + + mtreq->mptcp_rem_key = mopt->mptcp_sender_key; + mtreq->mptcp_loc_key = mopt->mptcp_receiver_key; + + /* Generate the token */ + mptcp_key_sha1(mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL); + + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + + /* Check, if the key is still free */ + if (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || + mptcp_find_token(mtreq->mptcp_loc_token)) + goto out; + + inet_rsk(req)->saw_mpc = 1; + mtreq->is_sub = 0; + inet_rsk(req)->mptcp_rqsk = 1; + mtreq->dss_csum = mopt->dss_csum; + +out: + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); +} + +int mptcp_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct mptcp_options_received mopt; + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); + + if (mopt.is_mp_join) + return mptcp_do_join_short(skb, &mopt, sock_net(sk)); + if (mopt.drop_me) + goto drop; + + if (!sock_flag(sk, SOCK_MPTCP)) + mopt.saw_mpc = 0; + + if (skb->protocol == htons(ETH_P_IP)) { + if (mopt.saw_mpc) { + if (skb_rtable(skb)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; + + MPTCP_INC_STATS_BH(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVE); + return tcp_conn_request(&mptcp_request_sock_ops, + &mptcp_request_sock_ipv4_ops, + sk, skb); + } + + return tcp_v4_conn_request(sk, skb); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (mopt.saw_mpc) { + if (!ipv6_unicast_destination(skb)) + goto drop; + + MPTCP_INC_STATS_BH(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVE); + return tcp_conn_request(&mptcp6_request_sock_ops, + &mptcp_request_sock_ipv6_ops, + sk, skb); + } + + return tcp_v6_conn_request(sk, skb); +#endif + } +drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return 0; +} + +static const struct snmp_mib mptcp_snmp_list[] = { + SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), + SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), + SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), + SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), + SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), + SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), + SNMP_MIB_ITEM("MPCapableRetransFallback", MPTCP_MIB_MPCAPABLERETRANSFALLBACK), + SNMP_MIB_ITEM("MPTCPCsumEnabled", MPTCP_MIB_CSUMENABLED), + SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), + SNMP_MIB_ITEM("MPFailRX", MPTCP_MIB_MPFAILRX), + SNMP_MIB_ITEM("MPCsumFail", MPTCP_MIB_CSUMFAIL), + SNMP_MIB_ITEM("MPFastcloseRX", MPTCP_MIB_FASTCLOSERX), + SNMP_MIB_ITEM("MPFastcloseTX", MPTCP_MIB_FASTCLOSETX), + SNMP_MIB_ITEM("MPFallbackAckSub", MPTCP_MIB_FBACKSUB), + SNMP_MIB_ITEM("MPFallbackAckInit", MPTCP_MIB_FBACKINIT), + SNMP_MIB_ITEM("MPFallbackDataSub", MPTCP_MIB_FBDATASUB), + SNMP_MIB_ITEM("MPFallbackDataInit", MPTCP_MIB_FBDATAINIT), + SNMP_MIB_ITEM("MPRemoveAddrSubDelete", MPTCP_MIB_REMADDRSUB), + SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), + SNMP_MIB_ITEM("MPJoinAlreadyFallenback", MPTCP_MIB_JOINFALLBACK), + SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), + SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), + SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), + SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), + SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), + SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("MPJoinAckMissing", MPTCP_MIB_JOINACKFAIL), + SNMP_MIB_ITEM("MPJoinAckRTO", MPTCP_MIB_JOINACKRTO), + SNMP_MIB_ITEM("MPJoinAckRexmit", MPTCP_MIB_JOINACKRXMIT), + SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), + SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), + SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), + SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), + SNMP_MIB_ITEM("DSSTrimHead", MPTCP_MIB_DSSTRIMHEAD), + SNMP_MIB_ITEM("DSSSplitTail", MPTCP_MIB_DSSSPLITTAIL), + SNMP_MIB_ITEM("DSSPurgeOldSubSegs", MPTCP_MIB_PURGEOLD), + SNMP_MIB_ITEM("AddAddrRx", MPTCP_MIB_ADDADDRRX), + SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), + SNMP_MIB_ITEM("RemAddrRx", MPTCP_MIB_REMADDRRX), + SNMP_MIB_ITEM("RemAddrTx", MPTCP_MIB_REMADDRTX), + SNMP_MIB_SENTINEL +}; + +struct workqueue_struct *mptcp_wq; +EXPORT_SYMBOL(mptcp_wq); + +/* Output /proc/net/mptcp */ +static int mptcp_pm_seq_show(struct seq_file *seq, void *v) +{ + struct tcp_sock *meta_tp; + struct sock *sk; + const struct net *net = seq->private; + int i, n = 0; + + seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode scheduler"); + seq_putc(seq, '\n'); + + for (i = 0; i < MPTCP_HASH_SIZE; i++) { + struct hlist_nulls_node *node; + rcu_read_lock_bh(); + hlist_nulls_for_each_entry_rcu(meta_tp, node, + &tk_hashtable[i], tk_table) { + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sock *meta_sk = (struct sock *)meta_tp; + struct inet_sock *isk = inet_sk(meta_sk); + + if (!mptcp(meta_tp) || !net_eq(net, sock_net(meta_sk))) + continue; + + if (capable(CAP_NET_ADMIN)) { + seq_printf(seq, "%4d: %04X %04X ", n++, + mpcb->mptcp_loc_token, + mpcb->mptcp_rem_token); + } else { + seq_printf(seq, "%4d: %04X %04X ", n++, -1, -1); + } + if (meta_sk->sk_family == AF_INET || + mptcp_v6_is_v4_mapped(meta_sk)) { + seq_printf(seq, " 0 %08X:%04X %08X:%04X ", + isk->inet_rcv_saddr, + ntohs(isk->inet_sport), + isk->inet_daddr, + ntohs(isk->inet_dport)); +#if IS_ENABLED(CONFIG_IPV6) + } else if (meta_sk->sk_family == AF_INET6) { + struct in6_addr *src = &meta_sk->sk_v6_rcv_saddr; + struct in6_addr *dst = &meta_sk->sk_v6_daddr; + seq_printf(seq, " 1 %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X", + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + ntohs(isk->inet_sport), + dst->s6_addr32[0], dst->s6_addr32[1], + dst->s6_addr32[2], dst->s6_addr32[3], + ntohs(isk->inet_dport)); +#endif + } + seq_printf(seq, " %02X %02X %08X:%08X %lu %s", + meta_sk->sk_state, mpcb->cnt_subflows, + meta_tp->write_seq - meta_tp->snd_una, + max_t(int, meta_tp->rcv_nxt - + meta_tp->copied_seq, 0), + sock_i_ino(meta_sk), mpcb->sched_ops->name); + seq_putc(seq, '\n'); + +#if 0 + // added more stats per subflow... maybe we should move this to a new file + seq_printf(seq, " snd rcv srtt mdev packets_out retrans_out snd_cwnd\n"); + + for ((sk) = (struct sock *)(mpcb)->connection_list; sk; sk = (struct sock *)tcp_sk(sk)->mptcp->next) { + //mptcp_for_each_sk(mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *isk_tmp = inet_sk(sk); + + seq_printf(seq, "%15llu%15llu%15u%15u%15u%15u%15u %#x %i.%i.%i.%i:%i %i.%i.%i.%i:%i\n", + tp->mptcp->bytes_snd, + tp->mptcp->bytes_rcv, + tp->srtt_us, tp->mdev_us, + tp->packets_out, + tp->retrans_out, + tp->snd_cwnd, + (unsigned int) tp, + isk_tmp->inet_rcv_saddr & 0x000000FF, + (isk_tmp->inet_rcv_saddr & 0x0000FF00)>>8, + (isk_tmp->inet_rcv_saddr & 0x00FF0000)>>16, + (isk_tmp->inet_rcv_saddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_sport), + isk_tmp->inet_daddr & 0x000000FF, + (isk_tmp->inet_daddr & 0x0000FF00)>>8, + (isk_tmp->inet_daddr & 0x00FF0000)>>16, + (isk_tmp->inet_daddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_dport) + ); + } +#endif + } + + rcu_read_unlock_bh(); + } + + return 0; +} + +static int mptcp_pm_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, mptcp_pm_seq_show); +} + +static const struct file_operations mptcp_pm_seq_fops = { + .owner = THIS_MODULE, + .open = mptcp_pm_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +static int mptcp_snmp_seq_show(struct seq_file *seq, void *v) +{ + struct net *net = seq->private; + int i; + + for (i = 0; mptcp_snmp_list[i].name != NULL; i++) + seq_printf(seq, "%-32s\t%ld\n", mptcp_snmp_list[i].name, + snmp_fold_field(net->mptcp.mptcp_statistics, + mptcp_snmp_list[i].entry)); + + return 0; +} + +static int mptcp_snmp_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, mptcp_snmp_seq_show); +} + +static const struct file_operations mptcp_snmp_seq_fops = { + .owner = THIS_MODULE, + .open = mptcp_snmp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +static int mptcp_pm_init_net(struct net *net) +{ + net->mptcp.mptcp_statistics = alloc_percpu(struct mptcp_mib); + if (!net->mptcp.mptcp_statistics) + goto out_mptcp_mibs; + +#ifdef CONFIG_PROC_FS + net->mptcp.proc_net_mptcp = proc_net_mkdir(net, "mptcp_net", net->proc_net); + if (!net->mptcp.proc_net_mptcp) + goto out_proc_net_mptcp; + if (!proc_create("mptcp", S_IRUGO, net->mptcp.proc_net_mptcp, + &mptcp_pm_seq_fops)) + goto out_mptcp_net_mptcp; + if (!proc_create("snmp", S_IRUGO, net->mptcp.proc_net_mptcp, + &mptcp_snmp_seq_fops)) + goto out_mptcp_net_snmp; +#endif + + return 0; + +#ifdef CONFIG_PROC_FS +out_mptcp_net_snmp: + remove_proc_entry("mptcp", net->mptcp.proc_net_mptcp); +out_mptcp_net_mptcp: + remove_proc_subtree("mptcp_net", net->proc_net); + net->mptcp.proc_net_mptcp = NULL; +out_proc_net_mptcp: + free_percpu(net->mptcp.mptcp_statistics); +#endif +out_mptcp_mibs: + return -ENOMEM; +} + +static void mptcp_pm_exit_net(struct net *net) +{ + remove_proc_entry("snmp", net->mptcp.proc_net_mptcp); + remove_proc_entry("mptcp", net->mptcp.proc_net_mptcp); + remove_proc_subtree("mptcp_net", net->proc_net); + free_percpu(net->mptcp.mptcp_statistics); +} + +static struct pernet_operations mptcp_pm_proc_ops = { + .init = mptcp_pm_init_net, + .exit = mptcp_pm_exit_net, +}; + +/* General initialization of mptcp */ +void __init mptcp_init(void) +{ + int i; + struct ctl_table_header *mptcp_sysctl; + + mptcp_sock_cache = kmem_cache_create("mptcp_sock", + sizeof(struct mptcp_tcp_sock), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (!mptcp_sock_cache) + goto mptcp_sock_cache_failed; + + mptcp_cb_cache = kmem_cache_create("mptcp_cb", sizeof(struct mptcp_cb), + 0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN, + NULL); + if (!mptcp_cb_cache) + goto mptcp_cb_cache_failed; + + mptcp_tw_cache = kmem_cache_create("mptcp_tw", sizeof(struct mptcp_tw), + 0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN, + NULL); + if (!mptcp_tw_cache) + goto mptcp_tw_cache_failed; + + get_random_bytes(mptcp_secret, sizeof(mptcp_secret)); + + mptcp_wq = alloc_workqueue("mptcp_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8); + if (!mptcp_wq) + goto alloc_workqueue_failed; + + for (i = 0; i < MPTCP_HASH_SIZE; i++) { + INIT_HLIST_NULLS_HEAD(&tk_hashtable[i], i); + INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_htb[i], + i + MPTCP_REQSK_NULLS_BASE); + INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_tk_htb[i], i); + } + + spin_lock_init(&mptcp_reqsk_hlock); + spin_lock_init(&mptcp_tk_hashlock); + + if (register_pernet_subsys(&mptcp_pm_proc_ops)) + goto pernet_failed; + +#if IS_ENABLED(CONFIG_IPV6) + if (mptcp_pm_v6_init()) + goto mptcp_pm_v6_failed; +#endif + if (mptcp_pm_v4_init()) + goto mptcp_pm_v4_failed; + + mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", mptcp_table); + if (!mptcp_sysctl) + goto register_sysctl_failed; + + if (mptcp_register_path_manager(&mptcp_pm_default)) + goto register_pm_failed; + + if (mptcp_register_scheduler(&mptcp_sched_default)) + goto register_sched_failed; + + pr_info("MPTCP: Stable release v0.89.0-rc"); + + mptcp_init_failed = false; + + return; + +register_sched_failed: + mptcp_unregister_path_manager(&mptcp_pm_default); +register_pm_failed: + unregister_net_sysctl_table(mptcp_sysctl); +register_sysctl_failed: + mptcp_pm_v4_undo(); +mptcp_pm_v4_failed: +#if IS_ENABLED(CONFIG_IPV6) + mptcp_pm_v6_undo(); +mptcp_pm_v6_failed: +#endif + unregister_pernet_subsys(&mptcp_pm_proc_ops); +pernet_failed: + destroy_workqueue(mptcp_wq); +alloc_workqueue_failed: + kmem_cache_destroy(mptcp_tw_cache); +mptcp_tw_cache_failed: + kmem_cache_destroy(mptcp_cb_cache); +mptcp_cb_cache_failed: + kmem_cache_destroy(mptcp_sock_cache); +mptcp_sock_cache_failed: + mptcp_init_failed = true; +} diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c index 95ff08d06562b..523c0dc1eb6ea 100644 --- a/net/mptcp/mptcp_input.c +++ b/net/mptcp/mptcp_input.c @@ -35,6 +35,10 @@ #include +// for rbs specific stuff :-( +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_sched.h" + /* is seq1 < seq2 ? */ static inline bool before64(const u64 seq1, const u64 seq2) { @@ -124,6 +128,20 @@ static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una) if (!fully_acked) break; + /* RBS specific stuff */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb* rbs_cb = mptcp_rbs_get_cb(meta_tp); + mptcp_debug("checking in mptcp_input for queue position\n"); + + if(skb == rbs_cb->queue_position) { + mptcp_debug("rbs has to correct queue position old %p\n", rbs_cb->queue_position); + mptcp_rbs_advance_send_head(meta_sk, &rbs_cb->queue_position); + mptcp_debug("rbs had to correct queue position %p\n", rbs_cb->queue_position); + } + } + mptcp_debug("unlinking skb %p from sk_send_queue\n", skb); + /* RBS specific stuff END */ + tcp_unlink_write_queue(skb, meta_sk); if (mptcp_is_data_fin(skb)) { @@ -159,6 +177,13 @@ static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una) break; } + mptcp_debug("unlinking skb %p from reinject_queue %p with reinject->next %p\n", skb, &mpcb->reinject_queue, mpcb->reinject_queue.next); + if(skb) { + mptcp_debug("skb->next = %p and skb->prev = %p\n", skb->next, skb->prev); + } else { + printk("##### skb is null\n"); + } + __skb_unlink(skb, &mpcb->reinject_queue); __kfree_skb(skb); } @@ -872,6 +897,12 @@ static int mptcp_validate_mapping(struct sock *sk, struct sk_buff *skb) return 0; } +//#define AFR_OOO_RECEIVE_VERBOSE + +/* checks all subflows ofo queues except sk if something fits */ +void mptcp_afr_all_ofo_queue(struct sock *meta_sk, struct sock *sk); + + /* @return: 0 everything is fine. Just continue processing * 1 subflow is broken stop everything * -1 this mapping has been put in the meta-receive-queue @@ -887,6 +918,10 @@ static int mptcp_queue_skb(struct sock *sk) u32 old_copied_seq = tp->copied_seq; bool data_queued = false; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo for sk %p\n", __func__, sk); +#endif + /* Have we not yet received the full mapping? */ if (!tp->mptcp->mapping_present || before(tp->rcv_nxt, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) @@ -967,7 +1002,9 @@ static int mptcp_queue_skb(struct sock *sk) * Then, kfree_skb_partial will not account the memory. */ skb_orphan(tmp1); - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo test if segment has already been received with skb->end_seq %u after meta_tp->rcv_nxt %u\n", __func__, TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt); +#endif /* This segment has already been received */ if (!after(TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt)) { __kfree_skb(tmp1); @@ -997,6 +1034,8 @@ static int mptcp_queue_skb(struct sock *sk) /* Check if this fills a gap in the ofo queue */ if (!skb_queue_empty(&meta_tp->out_of_order_queue)) mptcp_ofo_queue(meta_sk); + /* Whenever we check the ofo_queue, we check all sbf ofos */ + mptcp_afr_all_ofo_queue(meta_sk, sk); if (eaten) kfree_skb_partial(tmp1, fragstolen); @@ -1016,6 +1055,190 @@ static int mptcp_queue_skb(struct sock *sk) return data_queued ? -1 : -2; } +void afr_ofo_push(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp); + struct sock *meta_sk = mptcp_meta_sk(sk); + struct sk_buff *tmp, *skb; + + if(!mptcp_ooo_opt) { + return; + } + + if(tp->out_of_order_queue.qlen > 0) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ooo checking sbf %p out_of_order_queue.qlen %u\n", __func__, sk, tp->out_of_order_queue.qlen); +#endif + /* + * traverse the whole queue + * + * there might be lower data_seq with higher subflow_seq: + * repeat loop whenever we match something and we saw out of order data_seq + */ + while(true) { + bool out_of_order_data_seq = false; + u32 last_data_seq = 0; + bool matched = false; + + skb_queue_walk_safe(&tp->out_of_order_queue, skb, tmp) + { + u32 *ptr; + u32 data_seq, sub_seq, data_len; + +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ooo packet %p with seq %u and end_seq %u and len %u\n", __func__, skb, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq);; +#endif + + /* No mapping here? Exit - for a first version we only accept packets with mapping */ + /* TODO: check if old mapping still maps this sequence number */ + if (!mptcp_is_data_seq(skb)) { + // no ifdef, at this is really important! + printk("%s afr__ooo packet %p in ofo has no mapping... go to next skb...\n", __func__, skb); + continue; + } + + ptr = mptcp_skb_set_data_seq(skb, &data_seq, NULL); + ptr++; + sub_seq = get_unaligned_be32(ptr) + tp->mptcp->rcv_isn; + ptr++; + data_len = get_unaligned_be16(ptr); +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("packet %p in ofo queue with data_seq %u sub_seq %u and data_len %u\n", skb, data_seq, sub_seq, data_len); + printk("comparing meta_tp->rcv_nxt %u and data_seq %u\n", meta_tp->rcv_nxt, data_seq); +#endif + if(!before(last_data_seq, data_seq)) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("found out of order data_seq in the sbf ooo queue\n"); +#endif + out_of_order_data_seq = true; + } + + if (before(meta_tp->rcv_nxt, data_seq)) { + /* + * Seg's have to go to the meta-ofo-queue + * + * We avoid this, as most packets in heterogeneous networks would + * go into meta-ofo-queue. Instead, we check again whenever we get new packets. + */ + + // TODO really most packets? actually only those which are ooo in both layers. + } else { + /* Ready for the meta-rcv-queue */ + bool eaten = false; + bool fragstolen = false; + struct sk_buff* skb_cpy = NULL; + + /* Design decision: we keep all packets in the + * subflow receive queue (no unlinking) to keep + * changes minimal. + */ + u32 data_end_seq = data_seq + data_len; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("comparing data_end_seq %u and meta_tp->rcv_nxt %u\n", data_end_seq, meta_tp->rcv_nxt); +#endif + + if (!after(data_end_seq , meta_tp->rcv_nxt)) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("packet %p with meta_end_seq %u is already fully in meta_receive_queue\n", skb, data_end_seq); +#endif + continue; + } + + // I did not check the implementation, but packetdrill 3 checks that partially + // overlapping numbers work + + // design decision: copy the packet, so the old subflow logic still has its own copy + + skb_cpy = pskb_copy_for_clone(skb, GFP_ATOMIC); +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("ok, it will fit in the meta, so we COPY it to be sure... old %p to new skb_cpy %p\n", skb, skb_cpy); +#endif + + if(!skb_cpy) { + // no space available... actually, it is not so important + // but no ifdef here, as this is interesting! + printk("%s failed copying packet\n", __func__); + return; + } + + matched = true; + + // we have to recalculate the seq numbers... stolen from prepare_skb + /* Adapt data-seq's to the packet itself. We kinda transform the + * dss-mapping to a per-packet granularity. This is necessary to + * correctly handle overlapping mappings coming from different + * subflows. Otherwise it would be a complete mess. + */ + TCP_SKB_CB(skb_cpy)->seq = data_seq; + TCP_SKB_CB(skb_cpy)->end_seq = data_end_seq; + +// printk("%s matched ooo packet for meta_sk %p on sbf %p with sbf seq %u end_seq %u and meta seq %u end_seq %u with sbf.rq.count %u and tp->rcv_nxt %u\n", +// __func__, meta_sk, sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, data_seq, data_end_seq, tp->out_of_order_queue.qlen, tp->rcv_nxt); + mptcp_ooo_number_matches++; + + /* Is direct copy possible ? */ + if (TCP_SKB_CB(skb_cpy)->seq == meta_tp->rcv_nxt && + meta_tp->ucopy.task == current && + meta_tp->copied_seq == meta_tp->rcv_nxt && + meta_tp->ucopy.len && sock_owned_by_user(meta_sk)) { + +// printk("%s uses direct copy for skb_cpy %p\n", __func__, skb_cpy); + eaten = mptcp_direct_copy(skb_cpy, meta_sk); +// printk("%s direct copy returned %u\n", __func__, eaten); + } + + if (meta_tp->mpcb->in_time_wait) { /* In time-wait, do not receive data */ + printk("%s eats skb as we are in time wait\n", __func__); + eaten = 1; + } + + if (!eaten) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("we put skb_cpy %p into the meta receive queue\n", skb_cpy); +#endif + eaten = tcp_queue_rcv(meta_sk, skb_cpy, 0, &fragstolen); + } + + meta_tp->rcv_nxt = TCP_SKB_CB(skb_cpy)->end_seq; + // TODO do we need this? + //mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt); + + /* Check if this fills a gap in the ofo queue */ + if (!skb_queue_empty(&meta_tp->out_of_order_queue)) + mptcp_ofo_queue(meta_sk); + /* Whenever we check the ofo_queue, we check all sbf ofos */ + mptcp_afr_all_ofo_queue(meta_sk, sk); + } + } + + if(!matched) + break; + if(!out_of_order_data_seq) + break; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s matched packet and saw out of order data seq... iterate one more time\n", __func__); +#endif + } + } +} + +/* checks all subflows ofo queues except sk if something fits */ +void mptcp_afr_all_ofo_queue(struct sock *meta_sk, struct sock *not_sk) { + struct sock *sk; + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + + if(!mptcp_ooo_opt) { + return; + } + + mptcp_for_each_sk(mpcb, sk) { + if(sk == not_sk) + continue; + if(tcp_sk(sk)->out_of_order_queue.qlen > 0) { + afr_ofo_push(sk); + } + } +} + void mptcp_data_ready(struct sock *sk) { struct sock *meta_sk = mptcp_meta_sk(sk); @@ -1035,30 +1258,50 @@ void mptcp_data_ready(struct sock *sk) tcp_sk(sk)->copied_seq = tcp_sk(sk)->rcv_nxt; goto exit; } - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo for sk %p with sk_receive_queue.qlen %u called by %pS\n", __func__, sk, sk->sk_receive_queue.qlen, __builtin_return_address(0)); +#endif /* Iterate over all segments, detect their mapping (if we don't have * one yet), validate them and push everything one level higher. */ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { int ret; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo for sk %p walking skb %p with seq %u and end_seq %u and diff_seq %u\n", __func__, sk, skb, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq); +#endif /* Pre-validation - e.g., early fallback */ ret = mptcp_prevalidate_skb(sk, skb); if (ret < 0) goto restart; else if (ret > 0) break; - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo prevalidation of skb %p successfull\n", __func__, skb); +#endif /* Set the current mapping */ ret = mptcp_detect_mapping(sk, skb); if (ret < 0) goto restart; else if (ret > 0) break; - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo set mapping of skb %p successfull\n", __func__, skb); +#endif /* Validation */ if (mptcp_validate_mapping(sk, skb) < 0) goto restart; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo validate mapping of skb %p successfull\n", __func__, skb); +#endif + // some statistics + { + struct tcp_sock *tp = tcp_sk(sk); + tp->mptcp->bytes_rcv += skb->len; + } +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo pushing a level higher\n", __func__); +#endif /* Push a level higher */ ret = mptcp_queue_skb(sk); if (ret < 0) { @@ -1072,14 +1315,30 @@ void mptcp_data_ready(struct sock *sk) } } + /* in the current implementation, + * out of order packets of the subflow are not pushed + * to the meta socket. + * + * There are 2 implementation alternatives: + * 1. push all subflow ofos to the meta socket. + * This implies a lot of ofos which would be solved by... + * 2. only push a subflow ofo if it fits into the + * meta socket receive queue. + */ + afr_ofo_push(sk); + exit: if (tcp_sk(sk)->close_it) { tcp_send_ack(sk); tcp_sk(sk)->ops->time_wait(sk, TCP_TIME_WAIT, 0); } - if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD)) + if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD)) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo calls meta_sk->data_ready for meta_sk %p\n", __func__, meta_sk); +#endif meta_sk->sk_data_ready(meta_sk); + } } @@ -1414,6 +1673,9 @@ void mptcp_fin(struct sock *meta_sk) return; } +void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, + struct sock *sk, int clone_it); + static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) { struct tcp_sock *meta_tp = tcp_sk(meta_sk); @@ -1423,11 +1685,30 @@ static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) return; tcp_for_write_queue(skb, meta_sk) { - if (skb == tcp_send_head(meta_sk)) + if (skb == tcp_send_head(meta_sk)) { + printk("%s breaks at skb %p = sk_send_head\n", __func__, skb); break; + } - if (mptcp_retransmit_skb(meta_sk, skb)) - return; + printk("mptcp_xmit_retransmit_queue retransmits skb %p\n", skb); + + // rbs: check if we already sent this skb (not the data, the skb) + if (!tcp_skb_pcount(skb)) { + /* this is some kind of heritage... we put it in the reinject queue some days ago, but + actually this situation should not occur... + the current solution does not crash, but should not happen... therefore, print it */ + printk("%s wants to retransmit skb %p with seq %u but no pcount... skip it\n", __func__, skb, TCP_SKB_CB(skb)->seq); + + // put it in reinjection queue would be "correct", however, than we would be out of order... + // maybe we will directly xmit it here... + // maybe we will remove it from sending queue, and continue without cloning... + // mptcp_debug("rbs detected sk_send_head before unsent packets for retransmission of skb %p, put copy into reinjection queue\n", skb); + // __mptcp_reinject_data(skb, meta_sk, NULL, 1); + } else { + // was already sent, so we can use retransmit... + if (mptcp_retransmit_skb(meta_sk, skb)) + return; + } if (skb == tcp_write_queue_head(meta_sk)) inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, @@ -1439,14 +1720,28 @@ static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) /* Handle the DATA_ACK */ static void mptcp_data_ack(struct sock *sk, const struct sk_buff *skb) { - struct sock *meta_sk = mptcp_meta_sk(sk); + /*struct sock *meta_sk = mptcp_meta_sk(sk); struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - u32 prior_snd_una = meta_tp->snd_una; + u32 prior_snd_una = meta_tp->snd_una;*/ + struct sock *meta_sk; + struct tcp_sock *meta_tp; + struct tcp_sock *tp; + struct tcp_skb_cb *tcb; + u32 prior_snd_una; + int prior_packets; u32 nwin, data_ack, data_seq; u16 data_len = 0; + meta_sk = mptcp_meta_sk(sk); + meta_tp = tcp_sk(meta_sk); + tp = tcp_sk(sk); + tcb = TCP_SKB_CB(skb); + prior_snd_una = meta_tp->snd_una; + + + /* A valid packet came in - subflow is operational again */ tp->pf = 0; @@ -1524,12 +1819,18 @@ static void mptcp_data_ack(struct sock *sk, const struct sk_buff *skb) inet_csk(meta_sk)->icsk_probes_out = 0; meta_tp->rcv_tstamp = tcp_time_stamp; prior_packets = meta_tp->packets_out; + + if (meta_tp->mpcb->sched_ops->update_stats) + meta_tp->mpcb->sched_ops->update_stats(sk, skb, 0, 2); + if (!prior_packets) goto no_queue; meta_tp->snd_una = data_ack; + mptcp_debug("now going to mptcp_clean_rtx_queue for meta_sk %p with prior_snd_una %u\n", meta_sk, prior_snd_una); mptcp_clean_rtx_queue(meta_sk, prior_snd_una); + mptcp_debug("we are back from cleaning\n"); /* We are in loss-state, and something got acked, retransmit the whole * queue now! @@ -2146,6 +2447,9 @@ int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, { struct tcp_sock *tp = tcp_sk(sk); struct mptcp_options_received *mopt = &tp->mptcp->rx_opt; + + if (tp->mpcb->sched_ops->update_stats) + tp->mpcb->sched_ops->update_stats(sk, skb, 0, 2); if (tp->mpcb->infinite_mapping_rcv || tp->mpcb->infinite_mapping_snd) return 0; diff --git a/net/mptcp/mptcp_ipv6.c b/net/mptcp/mptcp_ipv6.c index bd57015399498..9daa69964f921 100644 --- a/net/mptcp/mptcp_ipv6.c +++ b/net/mptcp/mptcp_ipv6.c @@ -208,7 +208,7 @@ int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) &tcp_hashinfo, &ip6h->saddr, th->source, &ip6h->daddr, ntohs(th->dest), - inet6_iif(skb)); + tcp_v6_iif(skb)); if (!sk) { kfree_skb(skb); @@ -284,7 +284,7 @@ int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) */ req = inet6_csk_search_req(meta_sk, th->source, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, inet6_iif(skb)); + &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { inet_csk_reqsk_queue_drop(meta_sk, req); diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c index 3a6b675e8983c..f72f3b962af28 100644 --- a/net/mptcp/mptcp_output.c +++ b/net/mptcp/mptcp_output.c @@ -35,6 +35,7 @@ #include #include #include +#include "mptcp_rbs_queue.h" static const int mptcp_dss_len = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN + @@ -97,10 +98,23 @@ static bool mptcp_is_reinjected(const struct sk_buff *skb) static void mptcp_find_and_set_pathmask(const struct sock *meta_sk, struct sk_buff *skb) { struct sk_buff *skb_it; - + u32 counter = 0; skb_it = tcp_write_queue_head(meta_sk); tcp_for_write_queue_from(skb_it, meta_sk) { + counter++; + + if(counter == 1000) { + /*printk("%s called more than %u times with qlen %u for seq %u... we break at 100 000\n", __func__, counter, meta_sk->sk_write_queue.qlen, TCP_SKB_CB(skb)->seq); + printk("Caller is %pS\n", __builtin_return_address(0)); + printk("Callerer is %pS\n", __builtin_return_address(1));*/ + } + + if(counter > 1000 * 100) { + printk("ProgMP: %s breaking with %u and qlen %u\n", __func__, counter, meta_sk->sk_write_queue.qlen); + break; + } + if (skb_it == tcp_send_head(meta_sk)) break; @@ -114,7 +128,7 @@ static void mptcp_find_and_set_pathmask(const struct sock *meta_sk, struct sk_bu /* Reinject data from one TCP subflow to the meta_sk. If sk == NULL, we are * coming from the meta-retransmit-timer */ -static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, +void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, struct sock *sk, int clone_it) { struct sk_buff *skb, *skb1; @@ -123,11 +137,27 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk u32 seq, end_seq; if (clone_it) { + mptcp_debug("%s going to clone %p for rq %p\n", __func__, orig_skb, &mpcb->reinject_queue); /* pskb_copy is necessary here, because the TCP/IP-headers * will be changed when it's going to be reinjected on another * subflow. */ + skb = pskb_copy_for_clone(orig_skb, GFP_ATOMIC); + + /* afr added this as the cloned skb might be null and comparing rbs leads to null pointer otherwise */ + if (unlikely(!skb)) { + printk("%s failed cloning sb %p\n", __func__, orig_skb); + return; + } + + if (TCP_SKB_CB(orig_skb)->mptcp_rbs.user != TCP_SKB_CB(skb)->mptcp_rbs.user) { + printk("cloning skb %p with rbs %i leads to rbs %i\n", orig_skb, TCP_SKB_CB(orig_skb)->mptcp_rbs.user, TCP_SKB_CB(skb)->mptcp_rbs.user); + BUG(); + } + +// mptcp_debug("%s cloned %p and got %p with not_in_queue %u and not_in_queue %u\n", __func__, orig_skb, skb, +// TCP_SKB_CB(orig_skb)->mptcp_rbs_flags_not_in_queue, TCP_SKB_CB(skb)->mptcp_rbs_flags_not_in_queue); } else { __skb_unlink(orig_skb, &sk->sk_write_queue); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); @@ -138,6 +168,9 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk if (unlikely(!skb)) return; + /* now it is in the queue again */ + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 0; + if (sk && !mptcp_reconstruct_mapping(skb)) { __kfree_skb(skb); return; @@ -145,9 +178,14 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk skb->sk = meta_sk; + /* Reset subflow-specific TCP control-data */ + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tcp_flags &= (TCPHDR_ACK | TCPHDR_PSH); + /* If it reached already the destination, we don't have to reinject it */ if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { __kfree_skb(skb); + mptcp_debug("do not have to retransmit skb %p, reached already\n", skb); return; } @@ -183,11 +221,12 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk /* We need to find out the path-mask from the meta-write-queue * to properly select a subflow. */ - mptcp_find_and_set_pathmask(meta_sk, skb); + mptcp_find_and_set_pathmask(meta_sk, skb); // why is this needed? /* If it's empty, just add */ if (skb_queue_empty(&mpcb->reinject_queue)) { skb_queue_head(&mpcb->reinject_queue, skb); + mptcp_debug("added skb %p as first packet to reinjection_queue %p with new queue size %u intern %u\n", skb, &mpcb->reinject_queue, skb_queue_len(&mpcb->reinject_queue), mpcb->reinject_queue.qlen ); return; } @@ -214,6 +253,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Don't reinject */ + mptcp_debug("do not reinject skb %p, overlapping and fully present\n", skb); __kfree_skb(skb); return; } @@ -224,10 +264,13 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1); } } - if (!skb1) + if (!skb1) { __skb_queue_head(&mpcb->reinject_queue, skb); - else + mptcp_debug("added skb %p as queue head for rq %p\n", skb, &mpcb->reinject_queue); + } else { __skb_queue_after(&mpcb->reinject_queue, skb1, skb); + mptcp_debug("added skb %p after other skb %p in queue with queue size %i for rq %p\n", skb, skb1, skb_queue_len(&mpcb->reinject_queue), &mpcb->reinject_queue); + } /* And clean segments covered by new one as whole. */ while (!skb_queue_is_last(&mpcb->reinject_queue, skb)) { @@ -265,8 +308,11 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) (tcb->tcp_flags & TCPHDR_FIN && mptcp_is_data_fin(skb_it) && !skb_it->len)) continue; - if (mptcp_is_reinjected(skb_it)) + mptcp_debug("check skb %p for reinjection\n", skb_it); + if (mptcp_is_reinjected(skb_it)) { + mptcp_debug("skb %p was already in reinjection queue, no need for reinjection\n", skb_it); continue; + } tcb->mptcp_flags |= MPTCP_REINJECT; __mptcp_reinject_data(skb_it, meta_sk, sk, clone_it); @@ -276,6 +322,7 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) /* If sk has sent the empty data-fin, we have to reinject it too. */ if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 && TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index)) { + //printk("%s reinjects the empty data-fin\n", __func__); __mptcp_reinject_data(skb_it, meta_sk, NULL, 1); } @@ -434,6 +481,8 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject) MPTCPHDR_SEQ64_INDEX : 0); subskb = pskb_copy_for_clone(skb, GFP_ATOMIC); + mptcp_debug("mptcp_skb_entail copied skb %p to subskb %p, skb->next %p\n", skb, subskb, skb->next); + if (!subskb) return false; @@ -524,6 +573,16 @@ static int mptcp_fragment(struct sock *meta_sk, struct sk_buff *skb, u32 len, diff = skb->data_len; old_factor = tcp_skb_pcount(skb); + //mptcp_debug("mptcp_fragment for meta_sk %p with skb %p of length %u with reinject %u\n", meta_sk, skb, len, reinject); + + if(!skb) { + printk("mptcp_fragement found skb as null with reinject %i\n", reinject); + } + + if(!skb->next) { + printk("mptcp_fragement found skb->next for skb %p as null with reinject %i\n", skb, reinject); + } + /* The mss_now in tcp_fragment is used to set the tso_segs of the skb. * At the MPTCP-level we do not care about the absolute value. All we * care about is that it is set to 1 for accurate packets_out @@ -576,6 +635,7 @@ int mptcp_write_wakeup(struct sock *meta_sk) return -1; skb = tcp_send_head(meta_sk); + mptcp_debug("%s called by %pS for skb %p\n", __func__, __builtin_return_address(0), skb); if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(meta_tp))) { unsigned int mss; @@ -646,6 +706,19 @@ int mptcp_write_wakeup(struct sock *meta_sk) } } +bool is_tp_in_connection_list2(struct mptcp_cb *mpcb, struct tcp_sock* tp) { + struct tcp_sock* tmp = mpcb->connection_list; + + while(tmp) { + if(tmp == tp) { + return true; + } + + tmp = tmp->mptcp->next; + } + return false; +} + bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp) { @@ -657,31 +730,130 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, unsigned int sublimit; __u32 path_mask = 0; +#define RBS_DEBUG + +#ifdef RBS_DEBUG + struct sk_buff* skb_last_iteration = NULL; + unsigned int iteration_count = 0; +#endif + + mptcp_debug("rbs before loop meta_sk->send_head = %p and queue_len(sk_write_queue) = %5i\n", tcp_send_head(meta_sk), skb_queue_len(&meta_sk->sk_write_queue)); + while ((skb = mpcb->sched_ops->next_segment(meta_sk, &reinject, &subsk, &sublimit))) { unsigned int limit; +#ifdef RBS_DEBUG + if(iteration_count > 1000 * 1000) { // once found a correct execution with more than 100 + printk("ProgMP aborts iterations in %s after %u rounds\n", __func__, iteration_count); + BUG(); + } + + if(skb_last_iteration == skb) { + iteration_count++; + } else { + skb_last_iteration = skb; + iteration_count = 0; + } +#endif + /* this fix is important, but should happen somewhere else... we do not want to overcount packets_out in + * tcp_event_new_data_sent, which is only called if reinject == 0... + */ + if(reinject==0 && TCP_SKB_CB(skb)->path_mask) { + reinject = -1; + } + +// mptcp_debug("rbs in loop meta_sk->send_head = %p and queue_len(sk_write_queue) = %5i with skb %p and path_mask %u\n", +// tcp_send_head(meta_sk), skb_queue_len(&meta_sk->sk_write_queue), skb, TCP_SKB_CB(skb)->path_mask); + subtp = tcp_sk(subsk); + + /* + * rbs might return invalid subsk from its open actions + * + * before we use the subsk, ensure that it is still valid + * this test leads to an undefined symbol if executed in the scheduler, + * but would make more sense at this place + */ + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + if(!is_tp_in_connection_list2(mpcb, (struct tcp_sock*) subsk)) { +// mptcp_debug("rbs recovers skb %p from outdated subflow %p\n", skb, subsk); + /* + * Actually, this might be a semantic problematic, as packets in the open action might overtake this packet + * + * We have to clone the skb for the reinjection queue, as it will remain in the sending_queue + */ + __mptcp_reinject_data(skb, meta_sk, NULL, 1); +// mptcp_debug("added packet %p as first packet to reinject queue with new queue size %i, skb->prev = %p, skb->next = %p\n", skb, skb_queue_len(&mpcb->reinject_queue), skb->prev, skb->next ); + + continue; + } + } + mss_now = tcp_current_mss(subsk); if (reinject == 1) { - if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { - /* Segment already reached the peer, take the next one */ - __skb_unlink(skb, &mpcb->reinject_queue); - __kfree_skb(skb); - continue; + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + /* + * rbs manages the queue by itself, but for debugging purposes, we might want to check again + */ +#ifdef RBS_DEBUG + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { + printk("ProgMP: found a reinjected skb %p with end_seq %u < snd_una %u in %s ... it was already acknowledged\n", skb, TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una, __func__); + BUG(); + } +#endif + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { + /* Segment already reached the peer, take the next one */ + __skb_unlink(skb, &mpcb->reinject_queue); + __kfree_skb(skb); + continue; + } + } + } + + /* + * RBS copied from sched, without this coding, the fragmentation call later sometimes crashes + */ + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + u32 max_segs; + u16 gso_max_segs = subsk->sk_gso_max_segs; + if (!gso_max_segs) /* No gso supported on the subflow's NIC */ + gso_max_segs = 1; + max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); + if (!max_segs) { +// mptcp_debug("RBS0 recover ... could not send skb %p\n", skb); + //if(mpcb->sched_ops->recover_skb) { + // mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + //} + //break; +// mptcp_debug("%s temporarly removed this to check what happens when we send more than cwnd\n", __func__); } } + /* + * RBS copied from sched end + */ /* If the segment was cloned (e.g. a meta retransmission), * the header must be expanded/copied so that there is no * corruption of TSO information. */ - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) { +// mptcp_debug("RBS1 recover ... could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } - if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) + if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) { +// mptcp_debug("RBS2 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } /* Force tso_segs to 1 by using UINT_MAX. * We actually don't care about the exact number of segments @@ -701,8 +873,13 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, */ if (unlikely(!tcp_nagle_test(meta_tp, skb, mss_now, (tcp_skb_is_last(meta_sk, skb) ? - nonagle : TCP_NAGLE_PUSH)))) + nonagle : TCP_NAGLE_PUSH)))) { +// mptcp_debug("RBS3 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } limit = mss_now; /* skb->len > mss_now is the equivalent of tso_segs > 1 in @@ -725,13 +902,40 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, if (sublimit) limit = min(limit, sublimit); +#ifdef FRAGMENT_BUG_DBG + if(!skb) { +// printk("mptcp_next_segment found before fragement skb as null with reinject %i\n", reinject); + } + + if(!skb->next) { +// printk("mptcp_next_segment found before fragment skb->next for skb %p as null with reinject %i\n", skb, reinject); + } +#endif if (skb->len > limit && - unlikely(mptcp_fragment(meta_sk, skb, limit, gfp, reinject))) + unlikely(mptcp_fragment(meta_sk, skb, limit, gfp, reinject))) { +// mptcp_debug("RBS4 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } - if (!mptcp_skb_entail(subsk, skb, reinject)) + if (!mptcp_skb_entail(subsk, skb, reinject)) { +// mptcp_debug("RBS5 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } + + if(mpcb->sched_ops->update_stats) { + mpcb->sched_ops->update_stats(subsk, skb, skb->len, 0); + } + + /* Statisitics for the web */ + subtp->mptcp->bytes_snd += skb->len; + /* Nagle is handled at the MPTCP-layer, so * always push on the subflow */ @@ -749,14 +953,29 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, tcp_minshall_update(meta_tp, mss_now, skb); if (reinject > 0) { - __skb_unlink(skb, &mpcb->reinject_queue); - kfree_skb(skb); + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + /* + * rbs decides about unlinking, and always returns a copy of the skb if called with top, + * thus, calling free is safe at this position + */ + + // TODO a unlinked packet might cause problems in fragement, as fragement assumes the packet to be in a queue! + //kfree_skb(skb); +// mptcp_debug("%s pushed reinjected %p with next %p and qlen %u\n", +// __func__, skb, skb->next, mpcb->reinject_queue.qlen); + } else { + __skb_unlink(skb, &mpcb->reinject_queue); + kfree_skb(skb); + } } if (push_one) break; } +// mptcp_debug("rbs after loop meta_sk->send_head = %p and queue_len(sk_write_queue) = %5i and packets_out %u\n", +// tcp_send_head(meta_sk), skb_queue_len(&meta_sk->sk_write_queue), meta_tp->packets_out); + mptcp_for_each_sk(mpcb, subsk) { subtp = tcp_sk(subsk); @@ -1216,7 +1435,13 @@ void mptcp_send_fin(struct sock *meta_sk) */ mss_now = mptcp_current_mss(meta_sk); - if (tcp_send_head(meta_sk) != NULL) { + if(tcp_send_head(meta_sk) != NULL && skb == NULL) { + printk("ProgMP would have crashed in %s as send_head is not in write queue. how can this happen?\n", __func__); + } + + if (tcp_send_head(meta_sk) != NULL && + skb != NULL /* afr: ist das wirklich ein fix? ohne das hier gibt es manchmal am Ende einen absturz, weil der send_head auf etwas zeigt, aber die queue ler ist.*/) { + TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; meta_tp->write_seq++; @@ -1385,6 +1610,8 @@ int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) unsigned int limit, mss_now; int err = -1; + mptcp_debug("retransmit skb %p for meta_sk %p with skb->next = %p and skb->prev = %p\n", skb, meta_sk, skb->next, skb->prev); + /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. * @@ -1466,7 +1693,19 @@ void mptcp_meta_retransmit_timer(struct sock *meta_sk) struct tcp_sock *meta_tp = tcp_sk(meta_sk); struct mptcp_cb *mpcb = meta_tp->mpcb; struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); - int err; + int err = 0; + +/* mptcp_debug("######### afr meta_retransmit timer meta_sk->send_head %p , meta_sk->sk_write_queue-next %p, reinject_queue len %i, first packet in reinject queue %p called by %pS\n", + meta_sk->sk_send_head, + meta_sk->sk_write_queue.next, + skb_queue_len(&meta_tp->mpcb->reinject_queue), + skb_peek(&meta_tp->mpcb->reinject_queue), + __builtin_return_address(0));*/ + + if(tcp_write_queue_empty(meta_sk)) { + printk("ProgMP: abort meta retransmit as write queue is empty... why is this triggered at all?"); + return; + } /* In fallback, retransmission is handled at the subflow-level */ if (!meta_tp->packets_out || mpcb->infinite_mapping_snd) @@ -1503,7 +1742,17 @@ void mptcp_meta_retransmit_timer(struct sock *meta_sk) return; } - mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + mptcp_debug("meta_retransmit_timer wants to retransmit skb %p", tcp_write_queue_head(meta_sk)); + /* rbs: check if we already sent this skb (not the data, the skb) */ + if(!tcp_skb_pcount(tcp_write_queue_head(meta_sk))) { + // put it in reinjection queue would be "correct", however, than we would be out of order... + // maybe we will directly xmit it here... + // maybe we will remove it from sending queue, and continue without cloning... + mptcp_debug("rbs detected sk_send_head before unsent packets for retransmission of skb %p, put copy into reinjection queue\n", tcp_write_queue_head(meta_sk)); + __mptcp_reinject_data(tcp_write_queue_head(meta_sk), meta_sk, NULL, 1); + } else { + mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + } goto out_reset_timer; } @@ -1515,7 +1764,21 @@ void mptcp_meta_retransmit_timer(struct sock *meta_sk) meta_icsk->icsk_ca_state = TCP_CA_Loss; - err = mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + mptcp_debug("meta_retransmit_timer at second position wants to retransmit skb %p", tcp_write_queue_head(meta_sk)); + // TODO this code is just a copy of the stuff some lines above + /* rbs: check if we already sent this skb (not the data, the skb) */ + if(!tcp_skb_pcount(tcp_write_queue_head(meta_sk))) { + // put it in reinjection queue would be "correct", however, than we would be out of order... + // maybe we will directly xmit it here... + // maybe we will remove it from sending queue, and continue without cloning... + mptcp_debug("rbs detected sk_send_head before unsent packets for retransmission of skb %p at second position, put copy into reinjection queue\n", tcp_write_queue_head(meta_sk)); + // TODO jetzt beim zweiten draufschauen ist mir nicht mehr klar, warum wir das drin haben... wie kann write queue head ... naja + if(tcp_write_queue_head(meta_sk)) + __mptcp_reinject_data(tcp_write_queue_head(meta_sk), meta_sk, NULL, 1); + } else { + err = mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + } + if (err > 0) { /* Retransmission failed because of local congestion, * do not backoff. @@ -1578,6 +1841,12 @@ void mptcp_sub_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + /*printk("Sub retransmit Caller is %pS\n", __builtin_return_address(0)); + printk("Callerer is %pS\n", __builtin_return_address(1)); + mptcp_debug("######### sub_restransmit_timer meta_sk->send_head %p , reinject_queue len %i and queue %p and first entry %p called by %pS\n", + sk->sk_send_head, skb_queue_len(&tp->mpcb->reinject_queue), &tp->mpcb->reinject_queue, + tp->mpcb->reinject_queue.next, __builtin_return_address(0));*/ + tcp_retransmit_timer(sk); if (!tp->fastopen_rsk) { diff --git a/net/mptcp/mptcp_rbs_action.c b/net/mptcp/mptcp_rbs_action.c new file mode 100644 index 0000000000000..05c6b711fd87c --- /dev/null +++ b/net/mptcp/mptcp_rbs_action.c @@ -0,0 +1,44 @@ +#include "mptcp_rbs_action.h" +#include +#include +#include + +void mptcp_rbs_action_new(struct mptcp_rbs_actions *actions, bool high_priority, + enum mptcp_rbs_action_kind kind, struct tcp_sock *sbf, + struct sk_buff *skb, bool reinject) +{ + int i; + struct mptcp_rbs_action *action = NULL; + + /* Check if there is place in the static array */ + for (i = 0; i < STATIC_ACTIONS_NUM; ++i) { + if (!actions->static_actions[i].skb) { + action = &actions->static_actions[i]; + break; + } + } + + if (!action) + action = kmalloc(sizeof(struct mptcp_rbs_action), GFP_ATOMIC); + + action->next = NULL; + action->kind = kind; + action->sbf = sbf; + action->skb = skb; + action->end_seq = TCP_SKB_CB(skb)->end_seq; + action->reinject = reinject; + + if (high_priority) { + if (actions->first) + action->next = actions->first; + else + actions->last = action; + actions->first = action; + } else { + if (actions->last) + actions->last->next = action; + else + actions->first = action; + actions->last = action; + } +} diff --git a/net/mptcp/mptcp_rbs_action.h b/net/mptcp/mptcp_rbs_action.h new file mode 100644 index 0000000000000..ef8edffb8a78b --- /dev/null +++ b/net/mptcp/mptcp_rbs_action.h @@ -0,0 +1,60 @@ +#ifndef _MPTCP_RBS_ACTION_H +#define _MPTCP_RBS_ACTION_H + +#include + +struct mptcp_rbs_cb; +struct sk_buff; +struct tcp_sock; + +enum mptcp_rbs_action_kind { ACTION_KIND_PUSH, ACTION_KIND_DROP }; + +/* Action during evaluation */ +struct mptcp_rbs_action { + struct mptcp_rbs_action *next; + enum mptcp_rbs_action_kind kind; + struct tcp_sock *sbf; + struct sk_buff *skb; + u32 end_seq; + bool reinject; +}; + +#define STATIC_ACTIONS_NUM 10 + +/* Multiple actions */ +struct mptcp_rbs_actions { + struct mptcp_rbs_action static_actions[STATIC_ACTIONS_NUM]; + struct mptcp_rbs_action *first; + struct mptcp_rbs_action *last; +}; + +#define FOREACH_ACTION(actions, kind_, sbf_, skb_, end_seq_, reinject_, cmds) \ + do { \ + while ((actions)->first) { \ + struct mptcp_rbs_action *__cur = (actions)->first; \ + kind_ = __cur->kind; \ + sbf_ = __cur->sbf; \ + skb_ = __cur->skb; \ + end_seq_ = __cur->end_seq; \ + reinject_ = __cur->reinject; \ + \ + (actions)->first = (actions)->first->next; \ + if (!(actions)->first) \ + (actions)->last = NULL; \ + if (__cur < &(actions)->static_actions[0] || \ + __cur > \ + &(actions) \ + ->static_actions[STATIC_ACTIONS_NUM - 1]) \ + kfree(__cur); \ + else \ + __cur->skb = NULL; \ + \ + cmds; \ + } \ + } while (0) + +void mptcp_rbs_action_new(struct mptcp_rbs_actions *actions, bool high_priority, + enum mptcp_rbs_action_kind kind, struct tcp_sock *sbf, + struct sk_buff *skb, bool reinject); + +#endif diff --git a/net/mptcp/mptcp_rbs_cfg.c b/net/mptcp/mptcp_rbs_cfg.c new file mode 100644 index 0000000000000..708e967ddba39 --- /dev/null +++ b/net/mptcp/mptcp_rbs_cfg.c @@ -0,0 +1,230 @@ +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" +#include + +void mptcp_rbs_cfg_block_traverse(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + if (block->next) + mptcp_rbs_cfg_block_traverse(block->next, list); + if (block->next_else) + mptcp_rbs_cfg_block_traverse(block->next_else, list); +} + +void mptcp_rbs_cfg_block_append(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_smt *first_smt) +{ + struct mptcp_rbs_smt *smt; + + if (!block->first_smt) + block->first_smt = first_smt; + else { + smt = block->first_smt; + + while (smt->next) { + smt = smt->next; + } + + smt->next = first_smt; + } +} + +void mptcp_rbs_cfg_block_free(struct mptcp_rbs_cfg_block *block) +{ + if (block->first_smt) + mptcp_rbs_smts_free(block->first_smt); + MPTCP_RBS_VALUE_FREE(block->condition); + kfree(block); +} + +static void mptcp_rbs_cfg_free_helper(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + if (block->next) + mptcp_rbs_cfg_free_helper(block->next, list); + if (block->next_else) + mptcp_rbs_cfg_free_helper(block->next_else, list); +} + +void mptcp_rbs_cfg_blocks_free(struct mptcp_rbs_cfg_block *first_block) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block *block; + + INIT_BLOCK_LIST(&list); + mptcp_rbs_cfg_free_helper(first_block, &list); + + FOREACH_BLOCK(&list, block, mptcp_rbs_cfg_block_free(block)); + FREE_BLOCK_LIST(&list); +} + +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_block_clone( + const struct mptcp_rbs_cfg_block *block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_cfg_block *clone; + struct mptcp_rbs_smt *smt = block->first_smt; + struct mptcp_rbs_smt *last_clone_smt = NULL; + + clone = kmalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_ATOMIC); + *clone = *block; + + clone->first_smt = NULL; + while (smt) { + struct mptcp_rbs_smt *clone_smt = + mptcp_rbs_smt_clone(smt, user_ctx, user_func); + smt = smt->next; + + if (last_clone_smt) + last_clone_smt->next = clone_smt; + else + clone->first_smt = clone_smt; + last_clone_smt = clone_smt; + } + + clone->next = NULL; + clone->next_else = NULL; + if (clone->condition) + clone->condition = + (struct mptcp_rbs_value_bool *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->condition, user_ctx, + user_func); + + return clone; +} + +static struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_blocks_clone_helper( + const struct mptcp_rbs_cfg_block *block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *clone; + + /* Check if the block was already cloned */ + FOREACH_BLOCK(list, clone, if (block == clone->tag) return clone); + + clone = mptcp_rbs_cfg_block_clone(block, user_ctx, user_func); + clone->tag = (struct mptcp_rbs_cfg_block *) block; + ADD_BLOCK(list, clone); + + if (block->next) + clone->next = mptcp_rbs_cfg_blocks_clone_helper( + block->next, user_ctx, user_func, list); + if (block->next_else) + clone->next_else = mptcp_rbs_cfg_blocks_clone_helper( + block->next_else, user_ctx, user_func, list); + + return clone; +} + +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_blocks_clone( + const struct mptcp_rbs_cfg_block *first_block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + /* The list contains the new created blocks with their tags set to the + * old ones. This is important to allow concurrent copying + */ + struct mptcp_rbs_cfg_block *clone; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_cfg_block_list list; + + INIT_BLOCK_LIST(&list); + clone = mptcp_rbs_cfg_blocks_clone_helper(first_block, user_ctx, + user_func, &list); + FOREACH_BLOCK(&list, block, block->tag = NULL); + FREE_BLOCK_LIST(&list); + return clone; +} + +static int print_null_block(char **buffer, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block; + + /* Check if the block was already printed */ + FOREACH_BLOCK(list, block, if (block == NULL) return 0); + ADD_BLOCK(list, NULL); + + return sprintf_null(buffer, "%p:\n RETURN;\n\n", NULL); +} + +static int mptcp_rbs_cfg_block_print(const struct mptcp_rbs_cfg_block *block, + char **buffer, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + int len; + int tmp_len; + const struct mptcp_rbs_smt *smt = block->first_smt; + + /* Check if the block was already printed */ + FOREACH_BLOCK(list, block2, if (block == block2) return 0); + ADD_BLOCK(list, (struct mptcp_rbs_cfg_block *) block); + + len = sprintf_null(buffer, "%p:\n", block); + + while (smt) { + len += sprintf_null(buffer, " "); + tmp_len = mptcp_rbs_smt_print(smt, *buffer); + len += tmp_len; + if (buffer && *buffer) + *buffer += tmp_len; + len += sprintf_null(buffer, "\n"); + smt = smt->next; + } + + if (block->condition) { + len += sprintf_null(buffer, " IF "); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) block->condition, *buffer); + len += tmp_len; + if (buffer && *buffer) + *buffer += tmp_len; + len += sprintf_null(buffer, " GOTO %p ELSE GOTO %p;\n\n", + block->next, block->next_else); + + if (block->next) + len += mptcp_rbs_cfg_block_print(block->next, buffer, + list); + else + len += print_null_block(buffer, list); + + if (block->next_else) + len += mptcp_rbs_cfg_block_print(block->next_else, + buffer, list); + else + len += print_null_block(buffer, list); + } else if (block->next) { + len += sprintf_null(buffer, " GOTO %p;\n\n", block->next); + len += mptcp_rbs_cfg_block_print(block->next, buffer, list); + } else + len += sprintf_null(buffer, " RETURN;\n\n"); + + return len; +} + +int mptcp_rbs_cfg_blocks_print(const struct mptcp_rbs_cfg_block *first_block, + char *buffer) +{ + struct mptcp_rbs_cfg_block_list list; + int len; + + INIT_BLOCK_LIST(&list); + len = mptcp_rbs_cfg_block_print(first_block, &buffer, &list); + FREE_BLOCK_LIST(&list); + return len; +} diff --git a/net/mptcp/mptcp_rbs_cfg.h b/net/mptcp/mptcp_rbs_cfg.h new file mode 100644 index 0000000000000..6548686f2f146 --- /dev/null +++ b/net/mptcp/mptcp_rbs_cfg.h @@ -0,0 +1,109 @@ +#ifndef _MPTCP_RBS_CFG_H +#define _MPTCP_RBS_CFG_H + +#include "mptcp_rbs_dynarray.h" + +/* + * Struct for a single block inside the control flow graph. + * Blocks are singly linked to save memory and remove the necessity to handle + * lists of multiple previous blocks. Back pointers would only be useful for + * optimizations at creation time but useless for the rest of the time. + */ +struct mptcp_rbs_cfg_block { + /* The next block or NULL if the execution ends after this block */ + struct mptcp_rbs_cfg_block *next; + /* + * The alternative next block if the block ends with an if instruction + * or NULL + */ + struct mptcp_rbs_cfg_block *next_else; + /* Condition if the block ends with an if instruction or NULL */ + struct mptcp_rbs_value_bool *condition; + /* + * This field can be used for any purpose i.e. to store information + * during optimization + */ + void *tag; + /* First statement in the block */ + struct mptcp_rbs_smt *first_smt; +}; + +/* + * Block lists + */ + +DECL_DA(mptcp_rbs_cfg_block_list, struct mptcp_rbs_cfg_block *); + +#define INIT_BLOCK_LIST(list) INIT_DA(list) + +#define FREE_BLOCK_LIST(list) FREE_DA(list) + +#define ADD_BLOCK(list, block) ADD_DA_ITEM(list, block) + +#define FOREACH_BLOCK(list, var, cmds) FOREACH_DA_ITEM(list, var, cmds) + +/* + * Traverses over all blocks and adds them to the list + */ +void mptcp_rbs_cfg_block_traverse(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list); + +/* + * Appends statements at the end of the block + */ +void mptcp_rbs_cfg_block_append(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_smt *first_smt); + +/* + * Releases the passed control flow graph block and all its statements + */ +void mptcp_rbs_cfg_block_free(struct mptcp_rbs_cfg_block *block); + +/* + * Releases the passed control flow graph block and all its successors + */ +void mptcp_rbs_cfg_blocks_free(struct mptcp_rbs_cfg_block *first_block); + +#ifndef MPTCP_RBS_CLONE_USER_FUNC_DEFINED +#define MPTCP_RBS_CLONE_USER_FUNC_DEFINED +typedef struct mptcp_rbs_value *(*mptcp_rbs_value_clone_user_func)( + void *user_ctx, const struct mptcp_rbs_value *value); +#endif + +/* + * Creates a copy of a block and all its statements + * @block: The block to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_block_clone( + const struct mptcp_rbs_cfg_block *block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Creates a copy of a block, all its statements and all following blocks + * @first_block: The first block to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_blocks_clone( + const struct mptcp_rbs_cfg_block *first_block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Writes a string representation of a control flow graph block and all its + * successors to the given buffer + * @first_block: Pointer to the CFG block + * @buffer: Pointer to the buffer where the string should be stored or NULL + * Return: Number of written characters + */ +int mptcp_rbs_cfg_blocks_print(const struct mptcp_rbs_cfg_block *first_block, + char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_ctx.c b/net/mptcp/mptcp_rbs_ctx.c new file mode 100644 index 0000000000000..df00bf4d7cd2b --- /dev/null +++ b/net/mptcp/mptcp_rbs_ctx.c @@ -0,0 +1,13 @@ +#include "mptcp_rbs_ctx.h" + +bool mptcp_rbs_reg_value_set(struct tcp_sock *meta_tp, + struct mptcp_rbs_reg_value *value) +{ + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if (value->reg_num >= MPTCP_RBS_REG_COUNT) + return false; + + rbs_cb->regs[value->reg_num] = value->value; + return true; +} diff --git a/net/mptcp/mptcp_rbs_ctx.h b/net/mptcp/mptcp_rbs_ctx.h new file mode 100644 index 0000000000000..f4d5d249114d0 --- /dev/null +++ b/net/mptcp/mptcp_rbs_ctx.h @@ -0,0 +1,86 @@ +#ifndef _MPTCP_RBS_CTX_H +#define _MPTCP_RBS_CTX_H + +#include "mptcp_rbs_action.h" +#include "mptcp_rbs_var.h" +#include + +#define MPTCP_RBS_REG_COUNT 6 + +/* Central control block information per meta sock */ +struct mptcp_rbs_cb { + struct mptcp_rbs_scheduler *scheduler; + struct mptcp_rbs_scheduler_variation *variation; + struct mptcp_rbs_actions *open_actions; + unsigned int regs[MPTCP_RBS_REG_COUNT]; + struct sk_buff *queue_position; + u8 skb_prop; + u32 last_number_of_subflows; + u32 calls_since_sbf_change; + /* Execution counter for FOREACH loops. This count is used to detect if + * a loop is entered inside *_NEXT values + */ + u32 exec_count; + u32 highest_seq; + u32 execution_bucket; /* foreach pop and drop, it is increased by 5, foreach execution it is decreased by 1, if no bucket is left, switch to default scheduler! */ +}; + +/* Central control block information per subflow */ +struct mptcp_rbs_sbf_cb { + /* average bw sent */ + u64 bw_out_last_update_ns; + u64 bw_out_bytes; + + /* average bw acknowledged */ + u64 bw_ack_last_update_ns; + u64 bw_ack_bytes; + + /* Delay measurement values */ + u32 delay_in; + u32 delay_out; + + s64 user; + + /* total size = 8 * 6 bytes = 48 bytes */ +}; + +struct mptcp_rbs_eval_ctx { + struct sock *meta_sk; + struct mptcp_cb *mpcb; + struct mptcp_rbs_cb *rbs_cb; + struct mptcp_rbs_var vars[MPTCP_RBS_MAX_VAR_COUNT]; + /* maybe we will have an int with flags in the future */ + bool side_effects; +}; + +/* Struct that is used to set register values */ +struct mptcp_rbs_reg_value { + unsigned int reg_num; + unsigned int value; +}; + +static inline bool mptcp_rbs_is_sched_used(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + + return mpcb && mpcb->sched_ops && + !strncmp(mpcb->sched_ops->name, "rbs", 3); +} + +static inline struct mptcp_rbs_cb *mptcp_rbs_get_cb(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + + return (struct mptcp_rbs_cb *) &mpcb->mptcp_sched[0]; +} + +static inline struct mptcp_rbs_sbf_cb *mptcp_rbs_get_sbf_cb( + struct tcp_sock *sbf) +{ + return (struct mptcp_rbs_sbf_cb *) &sbf->mptcp->mptcp_sched[0]; +} + +bool mptcp_rbs_reg_value_set(struct tcp_sock *meta_tp, + struct mptcp_rbs_reg_value *value); + +#endif diff --git a/net/mptcp/mptcp_rbs_dynarray.h b/net/mptcp/mptcp_rbs_dynarray.h new file mode 100644 index 0000000000000..ec37aee409d8d --- /dev/null +++ b/net/mptcp/mptcp_rbs_dynarray.h @@ -0,0 +1,94 @@ +#ifndef _MPTCP_RBS_DYNARRAY_H +#define _MPTCP_RBS_DYNARRAY_H + +#include +#include + +#define DECL_DA(name, item_type) \ + struct name { \ + item_type *items; \ + int len; \ + int capacity; \ + } + +#define INIT_DA(array) \ + do { \ + (array)->items = NULL; \ + (array)->len = 0; \ + (array)->capacity = 0; \ + } while (0) + +#define FREE_DA(array) kfree((array)->items) + +#define ADD_DA_ITEM(array, item) INSERT_DA_ITEM(array, (array)->len, item) + +#define ADD_DA_ITEM_EX(array, item, grow_func) \ + INSERT_DA_ITEM_EX(array, (array)->len, item, grow_func) + +#define INSERT_DA_ITEM(array, index, item) \ + INSERT_DA_ITEM_EX(array, index, item, \ + !(array)->capacity ? 8 : (array)->capacity << 1) + +#define INSERT_DA_ITEM_EX(array, index, item, grow_func) \ + do { \ + if ((array)->len == (array)->capacity) { \ + (array)->capacity = grow_func; \ + (array)->items = krealloc((array)->items, \ + sizeof((array)->items[0]) * \ + (array)->capacity, \ + GFP_KERNEL); \ + } \ + \ + if ((index) != (array)->len) \ + memmove(&(array)[index + 1], &(array)[index], \ + ((array)->len - (index)) * \ + sizeof((array)->items[0])); \ + (array)->items[index] = item; \ + ++(array)->len; \ + } while (0) + +#define DELETE_DA_ITEM(array, index) \ + do { \ + BUG_ON((index) < 0); \ + BUG_ON((index) >= (array)->len); \ + \ + if ((index) < (array)->len - 1) \ + memmove((array)->items[index], \ + (array)->items[(index) + 1], \ + ((array)->len - 1 - (index)) * \ + sizeof((array)->items[0])); \ + \ + --(array)->len; \ + } while (0) + +#define GET_DA_LEN(array) (array)->len + +#define GET_DA_ITEM(array, index) \ + ({ \ + BUG_ON(index < 0 || index >= (array)->len); \ + (array)->items[index]; \ + }) + +#define FOREACH_DA_ITEM(array, var, cmds) \ + do { \ + typeof(var) *__item = (array)->items; \ + typeof(var) *__end = (array)->items + (array)->len; \ + while (__item != __end) { \ + var = *__item; \ + ++__item; \ + cmds; \ + } \ + } while (0) + +#define FOREACH_DA_ITEM_REV(array, var, cmds) \ + do { \ + typeof(var) *__item = (array)->items + (array)->len - 1; \ + typeof(var) *__end = (array)->items - 1; \ + while (__item != __end) { \ + var = *__item; \ + --__item; \ + cmds; \ + } \ + } while (0) + +#endif diff --git a/net/mptcp/mptcp_rbs_exec.c b/net/mptcp/mptcp_rbs_exec.c new file mode 100644 index 0000000000000..01c57b003b79c --- /dev/null +++ b/net/mptcp/mptcp_rbs_exec.c @@ -0,0 +1,50 @@ +#include "mptcp_rbs_exec.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" + +void mptcp_rbs_exec(struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_scheduler_variation *variation = + ctx->rbs_cb->variation; + struct mptcp_rbs_cfg_block *block = variation->first_block; + int i; + +#ifdef CONFIG_MPTCP_RBSMEASURE + u64 time = __native_read_tsc(); +#endif + + while (block) { + struct mptcp_rbs_smt *smt = block->first_smt; + struct mptcp_rbs_value_bool *cond = block->condition; + + while (smt) { + smt->execute(smt, ctx); + smt = smt->next; + } + + if (cond) { + s32 b = cond->execute(cond, ctx); + if (b <= 0) { + /* + * Else should be executed if the condition + * evaluates to false or to null + */ + block = block->next_else; + continue; + } + } + + block = block->next; + } + +#ifdef CONFIG_MPTCP_RBSMEASURE + variation->exec_count += 1; + variation->total_time += __native_read_tsc() - time; +#endif + + /* Release allocated variables */ + for (i = 0; i < variation->used_vars; ++i) { + mptcp_rbs_var_free(&ctx->vars[i]); + } +} diff --git a/net/mptcp/mptcp_rbs_exec.h b/net/mptcp/mptcp_rbs_exec.h new file mode 100644 index 0000000000000..e13749bac270d --- /dev/null +++ b/net/mptcp/mptcp_rbs_exec.h @@ -0,0 +1,11 @@ +#ifndef _MPTCP_RBS_EXEC_H +#define _MPTCP_RBS_EXEC_H + +#include "mptcp_rbs_ctx.h" + +/* + * Executes the CFG in the given context + */ +void mptcp_rbs_exec(struct mptcp_rbs_eval_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_lexer.c b/net/mptcp/mptcp_rbs_lexer.c new file mode 100644 index 0000000000000..8fd8991c65674 --- /dev/null +++ b/net/mptcp/mptcp_rbs_lexer.c @@ -0,0 +1,712 @@ +#include "mptcp_rbs_lexer.h" +#include +#include + +static bool inline is_whitespace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +static bool inline is_linebreak(char c) +{ + return c == '\n'; +} + +static bool inline is_number(char c) +{ + return c >= '0' && c <= '9'; +} + +static bool inline is_char(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static char last_error[64]; + +bool mptcp_rbs_get_next_token(char const **str, int *position, + int *line, int *line_position, struct mptcp_rbs_token *token) +{ + char c; + + /* Jump over whitespaces */ + while (true) { + c = **str; + + if (c == 0) { + /* End of data found */ + token->kind = TOKEN_KIND_EOD; + token->position = *position; + return true; + } else if (!is_whitespace(c)) + break; + + ++*str; + ++*position; + ++*line_position; + + if (is_linebreak(c)) { + ++*line; + *line_position = 0; + } + } + + /* Store the position of this token and increase it for future calls */ + token->position = *position; + token->line = *line; + token->line_position = *line_position; + ++*str; + ++*position; + ++*line_position; + + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + /* Must be a number literal */ + int value = c - '0'; + + while (true) { + c = **str; + + if (!is_number(c)) + break; + + value = value * 10 + c - '0'; + ++*str; + ++*position; + ++*line_position; + } + + token->kind = TOKEN_KIND_NUMBER; + token->number = value; + return true; + } + case '"': { + /* Must be a string literal */ + const char *start = *str; + int len; + + while (true) { + c = **str; + + if (c == 0 || c == '\n' || c == '\r') { + /* String was not terminated */ + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: string exceeds line", + *position); + return false; + } + + ++*str; + ++*position; + ++*line_position; + + if (c == '"') + break; + } + + len = *str - start - 1; + + /* Check if string is too long */ + if (len >= TOKEN_BUFFER_LEN) { + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: string is too long", + *position); + return false; + } + + token->kind = TOKEN_KIND_STRING; + memset(token->string, 0, TOKEN_BUFFER_LEN); + memcpy(token->string, start, len); + replace_escape_chars(token->string); + return true; + } + case '.': { + /* Must be . */ + token->kind = TOKEN_KIND_DOT; + return true; + } + case ',': { + /* Must be , */ + token->kind = TOKEN_KIND_COMMA; + return true; + } + case ';': { + /* Must be ; */ + token->kind = TOKEN_KIND_SEMICOLON; + return true; + } + case '!': { + /* Must be ! or != */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_UNEQUAL; + return true; + } + + token->kind = TOKEN_KIND_NOT; + return true; + } + case '(': { + /* Must be ( */ + token->kind = TOKEN_KIND_OPEN_BRACKET; + return true; + } + case ')': { + /* Must be ) */ + token->kind = TOKEN_KIND_CLOSE_BRACKET; + return true; + } + case '{': { + /* Must be { */ + token->kind = TOKEN_KIND_OPEN_CURLY; + return true; + } + case '}': { + /* Must be } */ + token->kind = TOKEN_KIND_CLOSE_CURLY; + return true; + } + case '=': { + /* Must be = or == */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_EQUAL; + return true; + } + + token->kind = TOKEN_KIND_ASSIGN; + return true; + } + case '<': { + /* Must be < or <= */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_LESS_EQUAL; + return true; + } + + token->kind = TOKEN_KIND_LESS; + return true; + } + case '>': { + /* Must be > or >= */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_GREATER_EQUAL; + return true; + } + + token->kind = TOKEN_KIND_GREATER; + return true; + } + case '+': { + /* Must be + */ + token->kind = TOKEN_KIND_ADD; + return true; + } + case '-': { + /* Must be - */ + token->kind = TOKEN_KIND_SUB; + return true; + } + case '*': { + /* Must be * */ + token->kind = TOKEN_KIND_MUL; + return true; + } + case '/': { + /* Might be a comment or / */ + if (**str == '*') { + int start_position = *position; + ++*str; + ++*position; + ++*line_position; + + while (true) { + c = **str; + ++*str; + ++*position; + ++*line_position; + + if (c == 0) { + /* End of comment is missing */ + memset(last_error, 0, + sizeof(last_error)); + sprintf(last_error, + "%d: Comment is not closed", + start_position); + return false; + } else if (is_linebreak(c)) { + ++*line; + *line_position = 0; + } else if (c == '*' && **str == '/') + break; + } + + ++*str; + ++*position; + ++*line_position; + return mptcp_rbs_get_next_token(str, position, line, line_position, token); + } + + token->kind = TOKEN_KIND_DIV; + return true; + } + case '%': { + /* Must be % */ + token->kind = TOKEN_KIND_REM; + return true; + } + default: { + const char *start = *str - 1; + int len; + + if (!is_char(c)) { + /* Illegal character found */ + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: illegal character %c", + *position, c); + return false; + } + + /* Must be keyword or identifier */ + while (true) { + c = **str; + + if (!is_char(c) && !is_number(c)) + break; + + ++*str; + ++*position; + } + + len = *str - start; + if (len == 2) { + if (!strncmp(start, "IF", len)) { + token->kind = TOKEN_KIND_IF; + return true; + } + if (!strncmp(start, "IN", len)) { + token->kind = TOKEN_KIND_IN; + return true; + } + if (!strncmp(start, "OR", len)) { + token->kind = TOKEN_KIND_OR; + return true; + } + } else if (len == 3) { + if (!strncmp(start, "AND", len)) { + token->kind = TOKEN_KIND_AND; + return true; + } + if (!strncmp(start, "SET", len)) { + token->kind = TOKEN_KIND_SET; + return true; + } + if (!strncmp(start, "VAR", len)) { + token->kind = TOKEN_KIND_VAR; + return true; + } + } else if (len == 4) { + if (!strncmp(start, "DROP", len)) { + token->kind = TOKEN_KIND_DROP; + return true; + } + if (!strncmp(start, "ELSE", len)) { + token->kind = TOKEN_KIND_ELSE; + return true; + } + if (!strncmp(start, "NULL", len)) { + token->kind = TOKEN_KIND_NULL; + return true; + } + if (!strncmp(start, "PUSH", len)) { + token->kind = TOKEN_KIND_PUSH; + return true; + } + if (!strncmp(start, "VOID", len)) { + token->kind = TOKEN_KIND_VOID; + return true; + } + } else if (len == 5) { + if (!strncmp(start, "PRINT", len)) { + token->kind = TOKEN_KIND_PRINT; + return true; + } + } else if (len == 6) { + if (!strncmp(start, "RETURN", len)) { + token->kind = TOKEN_KIND_RETURN; + return true; + } + } else if (len == 7) { + if (!strncmp(start, "FOREACH", len)) { + token->kind = TOKEN_KIND_FOREACH; + return true; + } + } else if (len == 8) { + if (!strncmp(start, "SET_USER", len)) { + token->kind = TOKEN_KIND_SET_USER; + return true; + } + } else if (len == 9) { + if (!strncmp(start, "SCHEDULER", len)) { + token->kind = TOKEN_KIND_SCHEDULER; + return true; + } + } else if (len >= TOKEN_BUFFER_LEN) { + /* Identifier is too long */ + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: identifier is too long", + *position); + return false; + } + + /* Must be identifier */ + token->kind = TOKEN_KIND_IDENT; + memset(token->string, 0, TOKEN_BUFFER_LEN); + memcpy(token->string, start, len); + return true; + } + } +} + +bool mptcp_rbs_get_next_token_lookahead(const char *str, int position, + int line, int line_position, struct mptcp_rbs_token *token) +{ + return mptcp_rbs_get_next_token(&str, &position, &line, &line_position, token); +} + +const char *mptcp_rbs_get_last_error(void) +{ + return last_error; +} + +void mptcp_rbs_token_kind_to_string(enum mptcp_rbs_token_kind kind, + char *buffer) +{ + switch (kind) { + case TOKEN_KIND_EOD: { + strcpy(buffer, "end of data"); + break; + } + case TOKEN_KIND_NUMBER: { + strcpy(buffer, "number"); + break; + } + case TOKEN_KIND_STRING: { + strcpy(buffer, "string"); + break; + } + case TOKEN_KIND_IDENT: { + strcpy(buffer, "identifier"); + break; + } + case TOKEN_KIND_NOT: { + strcpy(buffer, "!"); + break; + } + case TOKEN_KIND_ASSIGN: { + strcpy(buffer, "="); + break; + } + case TOKEN_KIND_EQUAL: { + strcpy(buffer, "=="); + break; + } + case TOKEN_KIND_UNEQUAL: { + strcpy(buffer, "!="); + break; + } + case TOKEN_KIND_LESS: { + strcpy(buffer, "<"); + break; + } + case TOKEN_KIND_LESS_EQUAL: { + strcpy(buffer, "<="); + break; + } + case TOKEN_KIND_GREATER: { + strcpy(buffer, ">"); + break; + } + case TOKEN_KIND_GREATER_EQUAL: { + strcpy(buffer, ">="); + break; + } + case TOKEN_KIND_ADD: { + strcpy(buffer, "+"); + break; + } + case TOKEN_KIND_SUB: { + strcpy(buffer, "-"); + break; + } + case TOKEN_KIND_MUL: { + strcpy(buffer, "*"); + break; + } + case TOKEN_KIND_DIV: { + strcpy(buffer, "/"); + break; + } + case TOKEN_KIND_REM: { + strcpy(buffer, "%"); + break; + } + case TOKEN_KIND_DOT: { + strcpy(buffer, "."); + break; + } + case TOKEN_KIND_COMMA: { + strcpy(buffer, ","); + break; + } + case TOKEN_KIND_SEMICOLON: { + strcpy(buffer, ";"); + break; + } + case TOKEN_KIND_OPEN_BRACKET: { + strcpy(buffer, "("); + break; + } + case TOKEN_KIND_CLOSE_BRACKET: { + strcpy(buffer, ")"); + break; + } + case TOKEN_KIND_OPEN_CURLY: { + strcpy(buffer, "{"); + break; + } + case TOKEN_KIND_CLOSE_CURLY: { + strcpy(buffer, "}"); + break; + } + case TOKEN_KIND_AND: { + strcpy(buffer, "AND"); + break; + } + case TOKEN_KIND_DROP: { + strcpy(buffer, "DROP"); + break; + } + case TOKEN_KIND_ELSE: { + strcpy(buffer, "ELSE"); + break; + } + case TOKEN_KIND_FOREACH: { + strcpy(buffer, "FOREACH"); + break; + } + case TOKEN_KIND_IF: { + strcpy(buffer, "IF"); + break; + } + case TOKEN_KIND_IN: { + strcpy(buffer, "IN"); + break; + } + case TOKEN_KIND_NULL: { + strcpy(buffer, "NULL"); + break; + } + case TOKEN_KIND_OR: { + strcpy(buffer, "OR"); + break; + } + case TOKEN_KIND_PRINT: { + strcpy(buffer, "PRINT"); + break; + } + case TOKEN_KIND_PUSH: { + strcpy(buffer, "PUSH"); + break; + } + case TOKEN_KIND_SET_USER: { + strcpy(buffer, "SET_USER"); + break; + } + case TOKEN_KIND_RETURN: { + strcpy(buffer, "RETURN"); + break; + } + case TOKEN_KIND_SCHEDULER: { + strcpy(buffer, "SCHEDULER"); + break; + } + case TOKEN_KIND_SET: { + strcpy(buffer, "SET"); + break; + } + case TOKEN_KIND_VAR: { + strcpy(buffer, "VAR"); + break; + } + case TOKEN_KIND_VOID: { + strcpy(buffer, "VOID"); + break; + } + } +} + +void mptcp_rbs_token_to_string(const struct mptcp_rbs_token *token, + char *buffer) +{ + switch (token->kind) { + case TOKEN_KIND_NUMBER: { + sprintf(buffer, "%d", token->number); + break; + } + case TOKEN_KIND_STRING: { + sprintf(buffer, "\"%s\"", token->string); + break; + } + case TOKEN_KIND_IDENT: { + sprintf(buffer, "%s", token->string); + break; + } + default: { + mptcp_rbs_token_kind_to_string(token->kind, buffer); + break; + } + } +} + +void replace_escape_chars(char *buffer) +{ + char *pos = buffer; + int remaining = strlen(buffer); + + while (remaining) { + char c = *pos; + ++pos; + --remaining; + + if (c == '\\' && remaining) { + bool is_escape = false; + + switch (*pos) { + case '\\': { + is_escape = true; + break; + } + case '\"': { + *(pos - 1) = '\"'; + is_escape = true; + break; + } + case 'n': { + *(pos - 1) = '\n'; + is_escape = true; + break; + } + case 'r': { + *(pos - 1) = '\r'; + is_escape = true; + break; + } + case 't': { + *(pos - 1) = '\t'; + is_escape = true; + break; + } + } + + if (is_escape) { + --remaining; + + if (remaining) + memcpy(pos, pos + 1, remaining); + *(pos + remaining) = 0; + } + } + } +} + +int replace_with_escape_chars(char *buffer, bool write) +{ + char *pos = buffer; + int len = strlen(buffer); + int remaining = len; + + while (remaining) { + char c = *pos; + ++pos; + --remaining; + + switch (c) { + case '\\': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = '\\'; + ++pos; + } + ++len; + break; + } + case '\"': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = '\"'; + ++pos; + } + ++len; + break; + } + case '\n': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = 'n'; + ++pos; + } + ++len; + break; + } + case '\r': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = 'r'; + ++pos; + } + ++len; + break; + } + case '\t': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = 't'; + ++pos; + } + ++len; + break; + } + } + } + + return len; +} diff --git a/net/mptcp/mptcp_rbs_lexer.h b/net/mptcp/mptcp_rbs_lexer.h new file mode 100644 index 0000000000000..a9d23e2192a46 --- /dev/null +++ b/net/mptcp/mptcp_rbs_lexer.h @@ -0,0 +1,109 @@ +#ifndef _MPTCP_RBS_LEXER_H +#define _MPTCP_RBS_LEXER_H + +#include + +#define TOKEN_BUFFER_LEN 64 + +/* Enumeration of possible token kinds */ +enum mptcp_rbs_token_kind { + /* End of data */ + TOKEN_KIND_EOD, + /* Number literal */ + TOKEN_KIND_NUMBER, + /* String literal */ + TOKEN_KIND_STRING, + /* Identifier */ + TOKEN_KIND_IDENT, + + TOKEN_KIND_NOT, + TOKEN_KIND_ASSIGN, + TOKEN_KIND_EQUAL, + TOKEN_KIND_UNEQUAL, + TOKEN_KIND_LESS, + TOKEN_KIND_LESS_EQUAL, + TOKEN_KIND_GREATER, + TOKEN_KIND_GREATER_EQUAL, + TOKEN_KIND_ADD, + TOKEN_KIND_SUB, + TOKEN_KIND_MUL, + TOKEN_KIND_DIV, + TOKEN_KIND_REM, + TOKEN_KIND_DOT, + TOKEN_KIND_COMMA, + TOKEN_KIND_SEMICOLON, + TOKEN_KIND_OPEN_BRACKET, + TOKEN_KIND_CLOSE_BRACKET, + TOKEN_KIND_OPEN_CURLY, + TOKEN_KIND_CLOSE_CURLY, + TOKEN_KIND_AND, + TOKEN_KIND_DROP, + TOKEN_KIND_ELSE, + TOKEN_KIND_FOREACH, + TOKEN_KIND_IF, + TOKEN_KIND_IN, + TOKEN_KIND_NULL, + TOKEN_KIND_OR, + TOKEN_KIND_PRINT, + TOKEN_KIND_PUSH, + TOKEN_KIND_SET_USER, + TOKEN_KIND_RETURN, + TOKEN_KIND_SCHEDULER, + TOKEN_KIND_SET, + TOKEN_KIND_VAR, + TOKEN_KIND_VOID +}; + +/* Struct for a single token */ +struct mptcp_rbs_token { + enum mptcp_rbs_token_kind kind; + int position; + int line; + int line_position; + union { + unsigned int number; + char string[TOKEN_BUFFER_LEN]; + }; +}; + +/* + * Returns the next token in a string + * @str: Pointer to the string + * @position: Pointer to the actual position in the string + * @token: Pointer to the token which should be filled + * @return: false if an error occurred. In this case call + * mptcp_rbs_get_last_error to get the error message + */ +bool mptcp_rbs_get_next_token(char const **str, int *position, + int *line, int *line_position, struct mptcp_rbs_token *token); + +/* + * Like mptcp_rbs_get_next_token but does not move to the next token + */ +bool mptcp_rbs_get_next_token_lookahead(const char *str, int position, + int line, int line_position, struct mptcp_rbs_token *token); + +/* + * Returns the last error message of the lexer + */ +const char *mptcp_rbs_get_last_error(void); + +void mptcp_rbs_token_kind_to_string(enum mptcp_rbs_token_kind kind, + char *buffer); + +void mptcp_rbs_token_to_string(const struct mptcp_rbs_token *token, + char *buffer); + +/* + * Replaces escape characters like \n with the actual characters inplace + */ +void replace_escape_chars(char *buffer); + +/* + * Replaces invisible characters like newline with escape characters inplace. + * If write is false the function only computes the length the buffer must have + * and returns it + */ +int replace_with_escape_chars(char *buffer, bool write); + +#endif diff --git a/net/mptcp/mptcp_rbs_multiplatform.h b/net/mptcp/mptcp_rbs_multiplatform.h new file mode 100644 index 0000000000000..f724005ce968d --- /dev/null +++ b/net/mptcp/mptcp_rbs_multiplatform.h @@ -0,0 +1,18 @@ +#ifndef MULTIPLATFORM_H +#define MULTIPLATFORM_H + +#define RBS_STATS + +#ifdef RBS_STATS + #define RBS_DO_STAT(stat, value) rbs_stats_update(stat, value); +#else +#define RBS_DO_STAT(stat, value) // nothing to do +#endif + +#define ALLOC(struct_name) kzalloc(sizeof(struct struct_name), GFP_ATOMIC); +#define FREE(instance) kfree(instance); + +#include +#include + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer.c b/net/mptcp/mptcp_rbs_optimizer.c new file mode 100644 index 0000000000000..10c14ca3ce26e --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer.c @@ -0,0 +1,84 @@ +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer_bm.h" +#include "mptcp_rbs_optimizer_cf.h" +#include "mptcp_rbs_optimizer_cve.h" +#include "mptcp_rbs_optimizer_dce.h" +#include "mptcp_rbs_optimizer_ebpf.h" +#include "mptcp_rbs_optimizer_lu.h" +#include "mptcp_rbs_optimizer_vi.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_value.h" +#include +#include +#include +#include + +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_find_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_opt_value_info *info = ctx->value_infos; + + while (info) { + if (info->value == value) + return info; + + info = info->next; + } + + return NULL; +} + +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_get_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_opt_value_info *info = + mptcp_rbs_opt_find_value_info(ctx, value); + + if (!info) { + info = kzalloc(sizeof(struct mptcp_rbs_opt_value_info), + GFP_KERNEL); + info->next = ctx->value_infos; + info->value = value; + ctx->value_infos = info; + } + + return info; +} + +typedef void (*optimization_func)(struct mptcp_rbs_opt_ctx *); + +/** Array with optimizations that are executed in order */ +static const optimization_func pipeline[] = { + /* mptcp_rbs_opt_lu, */ mptcp_rbs_opt_cve, mptcp_rbs_opt_vi, + mptcp_rbs_opt_dce, mptcp_rbs_opt_cf, mptcp_rbs_opt_bm +}; + +void mptcp_rbs_optimize(struct mptcp_rbs_scheduler_variation *variation, + bool *terminate, int sbf_num, bool ebpf) +{ + int i; + struct mptcp_rbs_opt_ctx ctx; + + /* Fill the context */ + memset(&ctx, 0, sizeof(struct mptcp_rbs_opt_ctx)); + ctx.variation = variation; + ctx.variation->sbf_num = sbf_num; + + /* Apply optimizations in pipeline */ + for (i = 0; i < ARRAY_SIZE(pipeline) && !*terminate; ++i) { + pipeline[i](&ctx); + } + + /* Release the context */ + while (ctx.value_infos) { + struct mptcp_rbs_opt_value_info *info = ctx.value_infos; + ctx.value_infos = ctx.value_infos->next; + kfree(info); + } + + if (!*terminate && sbf_num && ebpf) { + /* Generate eBPF code */ + mptcp_rbs_opt_ebpf(&ctx); + } +} diff --git a/net/mptcp/mptcp_rbs_optimizer.h b/net/mptcp/mptcp_rbs_optimizer.h new file mode 100644 index 0000000000000..8a8eb63260a18 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer.h @@ -0,0 +1,90 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_H +#define _MPTCP_RBS_OPTIMIZER_H + +#include "mptcp_rbs_var.h" + +struct mptcp_rbs_scheduler_variation; +struct mptcp_rbs_value; + +/** + * Information struct for values that contents are preserved over + * optimizations + */ +struct mptcp_rbs_opt_value_info { + struct mptcp_rbs_opt_value_info *next; + struct mptcp_rbs_value *value; + + /** Determines if the value is constant */ + bool is_const; + /** + * The evaluated value if the value is constant. + * -1 for NULL + * Booleans are encoded as 0 (false) and 1 (true) + * Integers are encoded normally + * Strings, subflows and sockbuffers cannot be stored + * The number of items can be stored for lists + */ + s64 const_value; +}; + +/** + * Information struct for variables that contents are preserved over + * optimizations + */ +struct mptcp_rbs_opt_var_info { + /** Declaration statement of the variable */ + struct mptcp_rbs_smt_var *smt; + /** Number of usages of the variable */ + int usage; +}; + +/** + * Context for optimization passes that contents are preserved over + * optimizations + */ +struct mptcp_rbs_opt_ctx { + struct mptcp_rbs_scheduler_variation *variation; + /** + * Singly linked list to "map" values to their information because they + * lack a tag field. For simplicity this is a list instead of a hash map + */ + struct mptcp_rbs_opt_value_info *value_infos; + /** + * Array that is used to connect variable indexes with their var + * statements and the number of usages of the variable + */ + struct mptcp_rbs_opt_var_info var_infos[MPTCP_RBS_MAX_VAR_COUNT]; +}; + +/** + * Searches for the corresponding value information + * @ctx: The optimization context + * @value: The value + * Return: The information of the value or NULL + */ +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_find_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value); + +/** + * Searches for the corresponding value information. If none was found a new + * information struct is created for this value + * @ctx: The optimization context + * @value: The value + * Return: The information of the value + */ +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_get_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value); + +/** + * Optimizes the given scheduler variation inplace. If terminate is set during + * optimization this function terminates + * @variation: The scheduler variation that should be optimized + * @terminate: Pointer to a value that aborts the optimization process if set to + * true + * @sbf_num: Fixed number of subflows the optimizer should optimize for or 0 + * @ebpf: Determines if eBPF code should be generated if possible + */ +void mptcp_rbs_optimize(struct mptcp_rbs_scheduler_variation *variation, + bool *terminate, int sbf_num, bool ebpf); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_bm.c b/net/mptcp/mptcp_rbs_optimizer_bm.c new file mode 100644 index 0000000000000..18daa05cc9b89 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_bm.c @@ -0,0 +1,103 @@ +#include "mptcp_rbs_optimizer_bm.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +#define SET_INCOMING(block, val) (block)->tag = (void *) (val) +#define GET_INCOMING(block) ((size_t)(block)->tag) +#define INC_INCOMING(block) ++*((size_t *) &(block)->tag) +#define DEC_INCOMING(block) --*((size_t *) &(block)->tag) + +void mptcp_rbs_opt_bm(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_cfg_block *next_block; + bool modified; + + /* Fill tags of all blocks with number of incoming edges */ + INIT_BLOCK_LIST(&list); + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list); + FOREACH_BLOCK(&list, block, SET_INCOMING(block, 0)); + INC_INCOMING(ctx->variation->first_block); + FOREACH_BLOCK(&list, block, { + if (block->next) + INC_INCOMING(block->next); + if (block->next_else) + INC_INCOMING(block->next_else); + }); + + /* This one is a fix point algorithm */ + do { + modified = false; + + FOREACH_BLOCK(&list, block, { + if (GET_INCOMING(block) == 0) + continue; + + /* Remove condition if both paths point to the same + * block + */ + if (block->condition && + block->next == block->next_else) { + block->condition->free(block->condition); + block->condition = NULL; + block->next_else = NULL; + if (block->next) + DEC_INCOMING(block->next); + modified = true; + continue; + } + + if (block->next && GET_INCOMING(block->next) == 1) { + next_block = block->next; + + if (!block->condition) { + /* Merge blocks */ + mptcp_rbs_cfg_block_append( + block, next_block->first_smt); + block->condition = + next_block->condition; + block->next = next_block->next; + block->next_else = + next_block->next_else; + + next_block->first_smt = NULL; + next_block->condition = NULL; + next_block->next = NULL; + next_block->next_else = NULL; + DEC_INCOMING(next_block); + modified = true; + continue; + } else if (!next_block->first_smt && + !next_block->condition) { + /* Remove next block */ + block->next = next_block->next; + DEC_INCOMING(next_block); + modified = true; + continue; + } + } + + if (block->next_else && + GET_INCOMING(block->next_else) == 1) { + next_block = block->next_else; + + if (!next_block->first_smt && + !next_block->condition) { + /* Remove next else block */ + block->next_else = next_block->next; + DEC_INCOMING(next_block); + modified = true; + continue; + } + } + }); + } while (modified); + + FOREACH_BLOCK(&list, block, if (GET_INCOMING(block) == 0) + mptcp_rbs_cfg_block_free(block)); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_bm.h b/net/mptcp/mptcp_rbs_optimizer_bm.h new file mode 100644 index 0000000000000..26722d342f311 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_bm.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_BM_H +#define _MPTCP_RBS_OPTIMIZER_BM_H + +struct mptcp_rbs_opt_ctx; + +/** + * Block Merging: + * Merges and removes empty blocks + * @ctx: The optimization context + */ +void mptcp_rbs_opt_bm(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_cf.c b/net/mptcp/mptcp_rbs_optimizer_cf.c new file mode 100644 index 0000000000000..d5f462078a7a5 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cf.c @@ -0,0 +1,977 @@ +#include "mptcp_rbs_optimizer_cf.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +#define IS_NULL(info) ((info) && (info)->is_const && (info)->const_value == -1) + +/* Since we cannot directly represent true or false in + * the language we have to use a comparison + */ +#define TRUE \ + ((struct mptcp_rbs_value *) mptcp_rbs_value_equal_new( \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0), \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0))) + +#define FALSE \ + ((struct mptcp_rbs_value *) mptcp_rbs_value_unequal_new( \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0), \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0))) + +static void opt_value(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_value **value_ptr) +{ + struct mptcp_rbs_value *value = *value_ptr; + struct mptcp_rbs_opt_value_info *info; + + /* Note: We can ignore constant NULL values here because they are + * propagated up to the root and as result this function is not called + */ + + switch (value->kind) { + case VALUE_KIND_CONSTINT: + case VALUE_KIND_CONSTSTRING: + case VALUE_KIND_NULL: { + /* Cannot be optimized any further */ + return; + } + case VALUE_KIND_BOOL_VAR: + case VALUE_KIND_INT_VAR: + case VALUE_KIND_STRING_VAR: + case VALUE_KIND_SBF_VAR: + case VALUE_KIND_SBFLIST_VAR: + case VALUE_KIND_SKB_VAR: + case VALUE_KIND_SKBLIST_VAR: { + /* Variables are inlined in an extra pass */ + return; + } + case VALUE_KIND_NOT: { + struct mptcp_rbs_value_not *not_value = + (struct mptcp_rbs_value_not *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + not_value->free(not_value); + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) ¬_value->operand); + + return; + } + case VALUE_KIND_EQUAL: { + struct mptcp_rbs_value_equal *equal_value = + (struct mptcp_rbs_value_equal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + equal_value->free(equal_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) &equal_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) &equal_value + ->right_operand); + } + + return; + } + case VALUE_KIND_UNEQUAL: { + struct mptcp_rbs_value_unequal *unequal_value = + (struct mptcp_rbs_value_unequal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + unequal_value->free(unequal_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &unequal_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &unequal_value + ->right_operand); + } + + return; + } + case VALUE_KIND_LESS: { + struct mptcp_rbs_value_less *less_value = + (struct mptcp_rbs_value_less *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + less_value->free(less_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) &less_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) &less_value + ->right_operand); + } + + return; + } + case VALUE_KIND_LESS_EQUAL: { + struct mptcp_rbs_value_less_equal *less_equal_value = + (struct mptcp_rbs_value_less_equal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + less_equal_value->free(less_equal_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &less_equal_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &less_equal_value + ->right_operand); + } + + return; + } + case VALUE_KIND_GREATER: { + struct mptcp_rbs_value_greater *greater_value = + (struct mptcp_rbs_value_greater *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + greater_value->free(greater_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &greater_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &greater_value + ->right_operand); + } + + return; + } + case VALUE_KIND_GREATER_EQUAL: { + struct mptcp_rbs_value_greater_equal *greater_equal_value = + (struct mptcp_rbs_value_greater_equal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + greater_equal_value->free(greater_equal_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value * + *) &greater_equal_value->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value * + *) &greater_equal_value->right_operand); + } + + return; + } + case VALUE_KIND_AND: { + struct mptcp_rbs_value_and *and_value = + (struct mptcp_rbs_value_and *) value; + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + and_value->free(and_value); + return; + } + + left_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) and_value->left_operand); + right_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) and_value->right_operand); + + if (left_info && left_info->is_const && + left_info->const_value == 1) { + struct mptcp_rbs_value_bool *right_operand = + and_value->right_operand; + + and_value->right_operand = NULL; + and_value->free(and_value); + + *value_ptr = (struct mptcp_rbs_value *) right_operand; + } else if (right_info && right_info->is_const && + right_info->const_value == 1) { + struct mptcp_rbs_value_bool *left_operand = + and_value->left_operand; + + and_value->left_operand = NULL; + and_value->free(and_value); + + *value_ptr = (struct mptcp_rbs_value *) left_operand; + } + + return; + } + case VALUE_KIND_OR: { + struct mptcp_rbs_value_or *or_value = + (struct mptcp_rbs_value_or *) value; + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + or_value->free(or_value); + return; + } + + left_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) or_value->left_operand); + right_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) or_value->right_operand); + + if (left_info && left_info->is_const && + left_info->const_value == 0) { + struct mptcp_rbs_value_bool *right_operand = + or_value->right_operand; + + or_value->right_operand = NULL; + or_value->free(or_value); + + *value_ptr = (struct mptcp_rbs_value *) right_operand; + } else if (right_info && right_info->is_const && + right_info->const_value == 0) { + struct mptcp_rbs_value_bool *left_operand = + or_value->left_operand; + + or_value->left_operand = NULL; + or_value->free(or_value); + + *value_ptr = (struct mptcp_rbs_value *) left_operand; + } + + return; + } + case VALUE_KIND_ADD: { + struct mptcp_rbs_value_add *add_value = + (struct mptcp_rbs_value_add *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + add_value->free(add_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) &add_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) &add_value + ->right_operand); + } + + return; + } + case VALUE_KIND_SUBTRACT: { + struct mptcp_rbs_value_subtract *subtract_value = + (struct mptcp_rbs_value_subtract *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + subtract_value->free(subtract_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &subtract_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &subtract_value + ->right_operand); + } + + return; + } + case VALUE_KIND_MULTIPLY: { + struct mptcp_rbs_value_multiply *multiply_value = + (struct mptcp_rbs_value_multiply *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + multiply_value->free(multiply_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &multiply_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &multiply_value + ->right_operand); + } + + return; + } + case VALUE_KIND_DIVIDE: { + struct mptcp_rbs_value_divide *divide_value = + (struct mptcp_rbs_value_divide *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + divide_value->free(divide_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) ÷_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) ÷_value + ->right_operand); + } + + return; + } + case VALUE_KIND_REMAINDER: { + struct mptcp_rbs_value_remainder *remainder_value = + (struct mptcp_rbs_value_remainder *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + remainder_value->free(remainder_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &remainder_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &remainder_value + ->right_operand); + } + + return; + } + case VALUE_KIND_IS_NULL: { + struct mptcp_rbs_value_is_null *is_null_value = + (struct mptcp_rbs_value_is_null *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + is_null_value->free(is_null_value); + } else + opt_value(ctx, + (struct mptcp_rbs_value **) &is_null_value + ->operand); + + return; + } + case VALUE_KIND_IS_NOT_NULL: { + struct mptcp_rbs_value_is_not_null *is_not_null_value = + (struct mptcp_rbs_value_is_not_null *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + is_not_null_value->free(is_not_null_value); + } else + opt_value(ctx, + (struct mptcp_rbs_value **) &is_not_null_value + ->operand); + + return; + } + case VALUE_KIND_REG: + case VALUE_KIND_Q: + case VALUE_KIND_QU: + case VALUE_KIND_RQ: + case VALUE_KIND_SUBFLOWS: + case VALUE_KIND_CURRENT_TIME_MS: + case VALUE_KIND_RANDOM: + case VALUE_KIND_SBFLIST_FILTER_SBF: + case VALUE_KIND_SKBLIST_FILTER_SKB: { + /* Cannot be constant */ + return; + } + case VALUE_KIND_SBF_RTT: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_RTT_MS: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt_ms *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_RTT_VAR: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt_var *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_USER: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_user *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_QUEUED: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_queued *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_IS_BACKUP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_is_backup *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_CWND: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_cwnd *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_SKBS_IN_FLIGHT: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_skbs_in_flight *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_LOST_SKBS: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lost_skbs *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_HAS_WINDOW_FOR: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->sbf); + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->skb); + return; + } + case VALUE_KIND_SBF_ID: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_id *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_DELAY_IN: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_in *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_DELAY_OUT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_out *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_BW_OUT_SEND: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_send *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_BW_OUT_ACK: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_ack *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_SSTHRESH: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_ssthresh *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_THROTTLED: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_throttled *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_LOSSY: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lossy *) value) + ->sbf); + return; + } + case VALUE_KIND_SBFLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_next *) value) + ->list); + return; + } + case VALUE_KIND_SBFLIST_EMPTY: { + struct mptcp_rbs_value_sbf_list_empty *empty_value = + (struct mptcp_rbs_value_sbf_list_empty *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + empty_value->free(empty_value); + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &empty_value->list); + + return; + } + case VALUE_KIND_SBFLIST_FILTER: { + struct mptcp_rbs_value_sbf_list_filter *filter_value = + (struct mptcp_rbs_value_sbf_list_filter *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &filter_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) filter_value->cond); + + if (info && info->is_const && info->const_value == 1) { + struct mptcp_rbs_value_sbf_list *list = + filter_value->list; + + filter_value->list = NULL; + filter_value->free(filter_value); + + *value_ptr = (struct mptcp_rbs_value *) list; + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &filter_value->cond); + + return; + } + case VALUE_KIND_SBFLIST_MAX: { + struct mptcp_rbs_value_sbf_list_max *max_value = + (struct mptcp_rbs_value_sbf_list_max *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &max_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) max_value->cond); + + if (info && info->is_const) { + /* Just take the first subflow */ + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_get_new( + max_value->list, + (struct mptcp_rbs_value_int *) + mptcp_rbs_value_constint_new(0)); + + max_value->list = NULL; + max_value->free(max_value); + } + + return; + } + case VALUE_KIND_SBFLIST_MIN: { + struct mptcp_rbs_value_sbf_list_min *min_value = + (struct mptcp_rbs_value_sbf_list_min *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &min_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) min_value->cond); + + if (info && info->is_const) { + /* Just take the first subflow */ + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_get_new( + min_value->list, + (struct mptcp_rbs_value_int *) + mptcp_rbs_value_constint_new(0)); + + min_value->list = NULL; + min_value->free(min_value); + } + + return; + } + case VALUE_KIND_SBFLIST_GET: { + struct mptcp_rbs_value_sbf_list_get *get_value = + (struct mptcp_rbs_value_sbf_list_get *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &get_value->list); + opt_value(ctx, (struct mptcp_rbs_value **) &get_value->index); + return; + } + case VALUE_KIND_SBFLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_count *) value) + ->list); + return; + } + case VALUE_KIND_SBFLIST_SUM: { + struct mptcp_rbs_value_sbf_list_sum *sum_value = + (struct mptcp_rbs_value_sbf_list_sum *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &sum_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) sum_value->cond); + + if (info && info->is_const && info->const_value == 0) { + /* Just 0 */ + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(0); + + sum_value->free(sum_value); + } + + return; + } + case VALUE_KIND_SKB_SENT_ON: { + struct mptcp_rbs_value_skb_sent_on *sent_on_value = + (struct mptcp_rbs_value_skb_sent_on *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &sent_on_value->sbf); + opt_value(ctx, (struct mptcp_rbs_value **) &sent_on_value->skb); + return; + } + case VALUE_KIND_SKB_SENT_ON_ALL: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on_all *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_USER: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_user *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_SEQ: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_seq *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_PSH: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_psh *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_LENGTH: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_length *) value) + ->skb); + return; + } + case VALUE_KIND_SKBLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_next *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_EMPTY: { + struct mptcp_rbs_value_skb_list_empty *empty_value = + (struct mptcp_rbs_value_skb_list_empty *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + empty_value->free(empty_value); + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &empty_value->list); + + return; + } + case VALUE_KIND_SKBLIST_POP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_pop *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_FILTER: { + struct mptcp_rbs_value_skb_list_filter *filter_value = + (struct mptcp_rbs_value_skb_list_filter *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &filter_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) filter_value->cond); + + if (info && info->is_const && info->const_value == 1) { + struct mptcp_rbs_value_skb_list *list = + filter_value->list; + + filter_value->list = NULL; + filter_value->free(filter_value); + + *value_ptr = (struct mptcp_rbs_value *) list; + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &filter_value->cond); + + return; + } + case VALUE_KIND_SKBLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_count *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_TOP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_top *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_GET: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_get *) value) + ->list); + return; + } + } +} + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_smt *smt) +{ + struct mptcp_rbs_opt_value_info *info; + + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) drop_smt->skb); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &drop_smt->skb); + + return; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->msg); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &print_smt->msg); + + if (print_smt->arg) { + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->arg); + if (!IS_NULL(info)) + opt_value(ctx, &print_smt->arg); + } + + return; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->sbf); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &push_smt->sbf); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->skb); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &push_smt->skb); + + return; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &set_smt->value); + + return; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_user_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &set_user_smt->value); + + return; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) var_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &var_smt->value); + + return; + } + case SMT_KIND_VOID: { + struct mptcp_rbs_smt_void *void_smt = + (struct mptcp_rbs_smt_void *) smt; + + if (void_smt->value) { + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) void_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &void_smt + ->value); + } + + return; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + return; + } + } +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_smt *smt; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + smt = block->first_smt; + while (smt) { + opt_smt(ctx, smt); + smt = smt->next; + } + + if (block->condition) + opt_value(ctx, (struct mptcp_rbs_value **) &block->condition); + if (block->next) + opt_block(ctx, block->next, list); + if (block->next_else) + opt_block(ctx, block->next_else, list); +} + +void mptcp_rbs_opt_cf(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + + INIT_BLOCK_LIST(&list); + opt_block(ctx, ctx->variation->first_block, &list); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_cf.h b/net/mptcp/mptcp_rbs_optimizer_cf.h new file mode 100644 index 0000000000000..9f18a3dd8606a --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cf.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_CF_H +#define _MPTCP_RBS_OPTIMIZER_CF_H + +struct mptcp_rbs_opt_ctx; + +/** + * Constant Folding: + * Combines constant values + * @ctx: The optimization context + */ +void mptcp_rbs_opt_cf(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_cve.c b/net/mptcp/mptcp_rbs_optimizer_cve.c new file mode 100644 index 0000000000000..59acb03aaa5d4 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cve.c @@ -0,0 +1,1160 @@ +#include "mptcp_rbs_optimizer_cve.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" +#include + +static void find_var_smts_in_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block) +{ + struct mptcp_rbs_smt *smt; + struct mptcp_rbs_smt_var *var_smt; + + smt = block->first_smt; + while (smt) { + if (smt->kind == SMT_KIND_VAR) { + var_smt = (struct mptcp_rbs_smt_var *) smt; + + ctx->var_infos[var_smt->var_number].smt = var_smt; + } + + smt = smt->next; + } +} + +static struct mptcp_rbs_opt_value_info *opt_value(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_opt_value_info *info = NULL; + +#define APPLY_ON_BIN(val, op) \ + struct mptcp_rbs_opt_value_info *left_info; \ + struct mptcp_rbs_opt_value_info *right_info; \ + left_info = \ + opt_value(ctx, (struct mptcp_rbs_value *) (val)->left_operand); \ + right_info = \ + opt_value(ctx, (struct mptcp_rbs_value *) (val)->right_operand); \ + \ + if (left_info && left_info->is_const) { \ + if (left_info->const_value == -1) { \ + info = mptcp_rbs_opt_get_value_info(ctx, value); \ + info->is_const = true; \ + info->const_value = -1; \ + } else if (right_info && right_info->is_const) { \ + info = mptcp_rbs_opt_get_value_info(ctx, value); \ + info->is_const = true; \ + if (right_info->const_value == -1) \ + info->const_value = -1; \ + else \ + op; \ + } \ + } else if (right_info && right_info->is_const && \ + right_info->const_value == -1) { \ + info = mptcp_rbs_opt_get_value_info(ctx, value); \ + info->is_const = true; \ + info->const_value = -1; \ + } \ + \ + return info; + + switch (value->kind) { + case VALUE_KIND_CONSTINT: { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = + ((struct mptcp_rbs_value_constint *) value)->value; + return info; + } + case VALUE_KIND_CONSTSTRING: { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + return info; + } + case VALUE_KIND_NULL: { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + return info; + } + case VALUE_KIND_BOOL_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_INT_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_STRING_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SBF_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SBFLIST_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SKB_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SKBLIST_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_NOT: { + struct mptcp_rbs_opt_value_info *operand_info; + operand_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_not *) + value) + ->operand); + + if (operand_info && operand_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (operand_info->const_value == -1) + info->const_value = -1; + else + info->const_value = !operand_info->const_value; + } + + return info; + } + case VALUE_KIND_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_equal *) value, + info->const_value = left_info->const_value == + right_info->const_value) + } + case VALUE_KIND_UNEQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_unequal *) value, + info->const_value = left_info->const_value != + right_info->const_value) + } + case VALUE_KIND_LESS: { + APPLY_ON_BIN((struct mptcp_rbs_value_less *) value, + info->const_value = left_info->const_value < + right_info->const_value) + } + case VALUE_KIND_LESS_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_less_equal *) value, + info->const_value = left_info->const_value <= + right_info->const_value) + } + case VALUE_KIND_GREATER: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater *) value, + info->const_value = left_info->const_value > + right_info->const_value) + } + case VALUE_KIND_GREATER_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater_equal *) value, + info->const_value = left_info->const_value >= + right_info->const_value) + } + case VALUE_KIND_AND: { + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + left_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->left_operand); + right_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->right_operand); + + if (left_info && left_info->is_const) { + if (left_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + } else if (right_info && right_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = + right_info->const_value == 1; + } + } else if (right_info && right_info->is_const && + right_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_OR: { + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + left_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->left_operand); + right_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->right_operand); + + if (left_info && left_info->is_const) { + if (left_info->const_value == 1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 1; + } else if (right_info && right_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = + right_info->const_value == 1; + } + } else if (right_info && right_info->is_const && + right_info->const_value == 1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 1; + } + + return info; + } + case VALUE_KIND_ADD: { + APPLY_ON_BIN((struct mptcp_rbs_value_add *) value, { + unsigned int result = + left_info->const_value + right_info->const_value; + info->const_value = result; + }) + } + case VALUE_KIND_SUBTRACT: { + APPLY_ON_BIN((struct mptcp_rbs_value_subtract *) value, { + unsigned int result = + left_info->const_value - right_info->const_value; + info->const_value = result; + }) + } + case VALUE_KIND_MULTIPLY: { + APPLY_ON_BIN((struct mptcp_rbs_value_multiply *) value, { + unsigned int result = + left_info->const_value * right_info->const_value; + info->const_value = result; + }) + } + case VALUE_KIND_DIVIDE: { + APPLY_ON_BIN((struct mptcp_rbs_value_divide *) value, { + if (!right_info->const_value) + info->const_value = -1; + else { + unsigned int result = left_info->const_value / + right_info->const_value; + info->const_value = result; + } + }) + } + case VALUE_KIND_REMAINDER: { + APPLY_ON_BIN((struct mptcp_rbs_value_remainder *) value, { + if (!right_info->const_value) + info->const_value = -1; + else { + unsigned int result = left_info->const_value % + right_info->const_value; + info->const_value = result; + } + }) + } + case VALUE_KIND_IS_NULL: { + struct mptcp_rbs_opt_value_info *operand_info; + operand_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_is_null *) value) + ->operand); + + if (operand_info && operand_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = operand_info->const_value == -1; + } else if (((struct mptcp_rbs_value_is_null *) value) + ->operand->kind == VALUE_KIND_REG) { + /* Registers can never hold NULL */ + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_IS_NOT_NULL: { + struct mptcp_rbs_opt_value_info *operand_info; + operand_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_is_not_null *) value) + ->operand); + + if (operand_info && operand_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = operand_info->const_value != -1; + } else if (((struct mptcp_rbs_value_is_not_null *) value) + ->operand->kind == VALUE_KIND_REG) { + /* Registers can never hold NULL */ + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 1; + } + + return info; + } + case VALUE_KIND_REG: + case VALUE_KIND_Q: + case VALUE_KIND_QU: + case VALUE_KIND_RQ: + case VALUE_KIND_CURRENT_TIME_MS: + case VALUE_KIND_RANDOM: + case VALUE_KIND_SBFLIST_FILTER_SBF: + case VALUE_KIND_SKBLIST_FILTER_SKB: { + /* Cannot be constant */ + return NULL; + } + case VALUE_KIND_SUBFLOWS: { + if (!ctx->variation->sbf_num) + return NULL; + + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = ctx->variation->sbf_num; + + return info; + } + case VALUE_KIND_SBF_RTT: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_rtt *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_IS_BACKUP: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_is_backup *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_CWND: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_cwnd *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_SKBS_IN_FLIGHT: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_skbs_in_flight *) + value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_LOST_SKBS: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_lost_skbs *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_HAS_WINDOW_FOR: { + struct mptcp_rbs_opt_value_info *sbf_info; + struct mptcp_rbs_opt_value_info *skb_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_has_window_for *) + value) + ->sbf); + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_has_window_for *) + value) + ->skb); + + if ((sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) || + (skb_info && skb_info->is_const && + skb_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_ID: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_id *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_DELAY_IN: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_delay_in *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_DELAY_OUT: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_delay_out *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_BW_OUT_SEND: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_bw_out_send *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_BW_OUT_ACK: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_bw_out_ack *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_SSTHRESH: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_ssthresh *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_THROTTLED: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_throttled *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_LOSSY: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_lossy *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_NEXT: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_next *) value) + ->list); + + if (sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_EMPTY: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_empty *) value) + ->list); + + if (sbf_list_info && sbf_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (sbf_list_info->const_value == -1) + info->const_value = -1; + else if (!sbf_list_info->const_value) + info->const_value = 1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SBFLIST_FILTER: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + cond_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + + if (sbf_list_info && sbf_list_info->is_const) { + if (sbf_list_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } else if (cond_info && cond_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else if (!cond_info->const_value) + info->const_value = 0; + else + info->const_value = + sbf_list_info->const_value; + } + } else if (cond_info && cond_info->is_const && + cond_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SBFLIST_MAX: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_max *) value) + ->list); + cond_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_max *) value) + ->cond); + + if ((sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value == -1) || + (cond_info && cond_info->is_const && + cond_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_MIN: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_min *) value) + ->list); + cond_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_min *) value) + ->cond); + + if ((sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value == -1) || + (cond_info && cond_info->is_const && + cond_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_GET: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *index_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_get *) value) + ->list); + index_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_get *) value) + ->index); + + if (sbf_list_info && sbf_list_info->is_const) { + if (sbf_list_info->const_value == -1 || + (index_info && index_info->is_const && + index_info->const_value >= + sbf_list_info->const_value)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + } else if (index_info && index_info->is_const) { + if (index_info->const_value == -1 || + (ctx->variation->sbf_num && + index_info->const_value >= + ctx->variation->sbf_num)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + } + + return info; + } + case VALUE_KIND_SBFLIST_COUNT: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_count *) value) + ->list); + + if (sbf_list_info && sbf_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = sbf_list_info->const_value; + } + + return info; + } + case VALUE_KIND_SBFLIST_SUM: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_sum *) value) + ->list); + cond_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_sum *) value) + ->cond); + + if ((sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value == -1) || + (cond_info && cond_info->is_const && + cond_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKB_SENT_ON: { + struct mptcp_rbs_opt_value_info *skb_info; + struct mptcp_rbs_opt_value_info *sbf_info; + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_sent_on *) value) + ->sbf); + + if ((skb_info && skb_info->is_const && + skb_info->const_value == -1) || + (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKB_SENT_ON_ALL: { + struct mptcp_rbs_opt_value_info *skb_info; + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + + if (skb_info && skb_info->is_const && + skb_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKB_USER: { + struct mptcp_rbs_opt_value_info *skb_info; + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_user *) value) + ->skb); + + if (skb_info && skb_info->is_const && + skb_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKBLIST_NEXT: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_next *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const && + skb_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKBLIST_EMPTY: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_empty *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (skb_list_info->const_value == -1) + info->const_value = -1; + else if (!skb_list_info->const_value) + info->const_value = 1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SKBLIST_POP: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_pop *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const && + skb_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKBLIST_FILTER: { + struct mptcp_rbs_opt_value_info *skb_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + cond_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + + if (skb_list_info && skb_list_info->is_const) { + if (skb_list_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } else if (cond_info && cond_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else if (!cond_info->const_value) + info->const_value = 0; + else + info->const_value = + skb_list_info->const_value; + } + } else if (cond_info && cond_info->is_const && + cond_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SKBLIST_COUNT: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_count *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = skb_list_info->const_value; + } + + return info; + } + case VALUE_KIND_SKBLIST_TOP: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_top *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const && + skb_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + default: + return NULL; + } +} + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_smt *smt) +{ + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) drop_smt->skb); + break; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) print_smt->msg); + if (print_smt->arg) + opt_value(ctx, print_smt->arg); + break; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) push_smt->sbf); + opt_value(ctx, (struct mptcp_rbs_value *) push_smt->skb); + break; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) set_smt->value); + break; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) set_user_smt->value); + break; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + + opt_value(ctx, var_smt->value); + break; + } + case SMT_KIND_VOID: { + struct mptcp_rbs_smt_void *void_smt = + (struct mptcp_rbs_smt_void *) smt; + + if (void_smt->value) + opt_value(ctx, void_smt->value); + break; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + break; + } + } +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block) +{ + struct mptcp_rbs_smt *smt; + + smt = block->first_smt; + while (smt) { + opt_smt(ctx, smt); + smt = smt->next; + } + + if (block->condition) + opt_value(ctx, (struct mptcp_rbs_value *) block->condition); +} + +void mptcp_rbs_opt_cve(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block *block; + + /* Clear variable information */ + memset(ctx->var_infos, 0, sizeof(ctx->var_infos)); + + INIT_BLOCK_LIST(&list); + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list); + + /* Find var statements */ + FOREACH_BLOCK(&list, block, find_var_smts_in_block(ctx, block)); + + /* Calculate constant values */ + FOREACH_BLOCK(&list, block, opt_block(ctx, block)); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_cve.h b/net/mptcp/mptcp_rbs_optimizer_cve.h new file mode 100644 index 0000000000000..30857b5cbe615 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cve.h @@ -0,0 +1,14 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_CVE_H +#define _MPTCP_RBS_OPTIMIZER_CVE_H + +struct mptcp_rbs_opt_ctx; + +/** + * Constant Value Evaluation: + * Searches for constant values, evaluates them and stores the results inside + * the values' info + * @ctx: The optimization context + */ +void mptcp_rbs_opt_cve(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_dce.c b/net/mptcp/mptcp_rbs_optimizer_dce.c new file mode 100644 index 0000000000000..34ef5b9c1332f --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_dce.c @@ -0,0 +1,183 @@ +#include "mptcp_rbs_optimizer_dce.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +#define IS_NULL(info) ((info) && (info)->is_const && (info)->const_value == -1) + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_smt ***smt_ptr) +{ + struct mptcp_rbs_smt *smt = **smt_ptr; + struct mptcp_rbs_opt_value_info *info; + struct mptcp_rbs_opt_value_info *info2; + + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) drop_smt->skb); + if (!IS_NULL(info)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->msg); + if (print_smt->arg) + info2 = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->arg); + else + info2 = NULL; + + if (!IS_NULL(info) && !IS_NULL(info2)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->sbf); + info2 = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->skb); + + if (!IS_NULL(info) && !IS_NULL(info2)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_smt->value); + + if (!IS_NULL(info)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_user_smt->value); + + if (!IS_NULL(info)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + struct mptcp_rbs_opt_var_info *var_info; + + var_info = &ctx->var_infos[var_smt->var_number]; + if (!var_info || !var_info->smt || var_info->usage) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_VOID: { + /* Since VOID is only for measurements we are allowed to remove + * it + */ + break; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + return; + } + } + + /* Remove the statement */ + **smt_ptr = smt->next; + smt->free(smt); +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block) +{ + struct mptcp_rbs_smt **smt; + struct mptcp_rbs_opt_value_info *info; + + smt = &block->first_smt; + while (smt && *smt) { + opt_smt(ctx, &smt); + } + + if (block->condition) { + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) block->condition); + + if (info && info->is_const) { + if (info->const_value != 1) + block->next = block->next_else; + + block->next_else = NULL; + block->condition->free(block->condition); + block->condition = NULL; + } + } +} + +void mptcp_rbs_opt_dce(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block_list list2; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_cfg_block *block2; + bool found; + + INIT_BLOCK_LIST(&list); + INIT_BLOCK_LIST(&list2); + + /* Fill list with blocks */ + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list); + + /* Remove NULL statements and constant ifs */ + FOREACH_BLOCK(&list, block, opt_block(ctx, block)); + + /* Free unused blocks */ + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list2); + FOREACH_BLOCK(&list, block, { + found = false; + FOREACH_BLOCK(&list2, block2, if (block == block2) { + found = true; + break; + }); + if (!found) + mptcp_rbs_cfg_block_free(block); + }); + + FREE_BLOCK_LIST(&list); + FREE_BLOCK_LIST(&list2); +} + +// TODO RBS Compact variables if some were removed diff --git a/net/mptcp/mptcp_rbs_optimizer_dce.h b/net/mptcp/mptcp_rbs_optimizer_dce.h new file mode 100644 index 0000000000000..87d72e2fd29b5 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_dce.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_DCE_H +#define _MPTCP_RBS_OPTIMIZER_DCE_H + +struct mptcp_rbs_opt_ctx; + +/** + * Dead Code Elimination: + * Erases code that is never executed + * @ctx: The optimization context + */ +void mptcp_rbs_opt_dce(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf.c b/net/mptcp/mptcp_rbs_optimizer_ebpf.c new file mode 100644 index 0000000000000..fb7f67f0f949c --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf.c @@ -0,0 +1,4172 @@ +#include "mptcp_rbs_optimizer_ebpf.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_optimizer_ebpf_regalloc.h" +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" +#include + +/** Some fixed temporaries */ +enum { CTX_TMP, VARS_TMP, REGS_TMP, FIXED_TMP_COUNT }; + +/** Information about a filter/min/max/sum variable */ +struct filter_var { + const void *progress; + int temp; +}; + +/* + * Filter var information lists + */ + +DECL_DA(filter_var_list, struct filter_var *); + +#define INIT_FILTER_VAR_LIST(list) INIT_DA(list) + +#define FREE_FILTER_VAR_LIST(list) FREE_DA(list) + +#define PUSH_FILTER_VAR(list, var) ADD_DA_ITEM(list, var) + +#define POP_FILTER_VAR(list) DELETE_DA_ITEM(list, GET_DA_LEN(list) - 1) + +#define FOREACH_FILTER_VAR(list, var, cmds) FOREACH_DA_ITEM(list, var, cmds) + +/** Context for eBPF code generation */ +struct ebpf_ctx { + /** Pointer to the optimization context */ + struct mptcp_rbs_opt_ctx *ctx; + /** Number of used temporaries */ + int used_temps; + /** Map with used temporaries */ + u64 used_temps_map; + /** The buffer for string constants */ + char **strs; + /** Length of strs */ + int strs_len; + /** Capacity of instruction list inside the current block */ + int capacity; + /** The current block */ + struct mptcp_rbs_cfg_block *block; + /** The current eBPF block */ + struct mptcp_rbs_ebpf_block *eblock; + /** List with active filter variables */ + struct filter_var_list filter_var_list; + /** Variable number of a found *_NEXT value or -1 */ + int next_var; + /** NULL eBPF block of the variable with *_NEXT value or NULL */ + struct mptcp_rbs_ebpf_block *next_var_null_eblock; +}; + +/** Information about a block that is stored inside the tag field */ +struct block_info { + /** The corresponding eBPF block of this block */ + struct mptcp_rbs_ebpf_block *eblock; + /** + * Break eBPF block if this block is the beginning of a foreach loop or + * NULL + */ + struct mptcp_rbs_ebpf_block *break_eblock; + /** + * Continue eBPF block if this block is the beginning of a foreach loop + * or NULL + */ + struct mptcp_rbs_ebpf_block *cont_eblock; + /** Mask with reserved temporaries by a foreach loop */ + u64 reserved_temps_map; +}; + +#define BLOCK_INFO(block) ((struct block_info *) (block)->tag) + +/** + * Adds an eBPF instruction to a block + * @eblock: The block where the instruction should be added + * @capacity: Pointer to the capacity of the block + * @instr: The instruction to add + */ +static void add_instr(struct mptcp_rbs_ebpf_block *eblock, int *capacity, + struct mptcp_rbs_ebpf_instr instr) +{ + if (*capacity == eblock->instr_count) { + *capacity = *capacity == 0 ? 4 : *capacity << 1; + eblock->instrs = + krealloc(eblock->instrs, + *capacity * sizeof(struct mptcp_rbs_ebpf_instr), + GFP_KERNEL); + } + + eblock->instrs[eblock->instr_count] = instr; + ++eblock->instr_count; +} + +#define add_instr_ectx(ectx, instr) \ + add_instr(ectx->eblock, &ectx->capacity, instr) + +#define TEMP_TO_MAP(t) (1 << (t)) + +/** + * Reserves a temporary + * @ectx: The generation context + * @return: The reserved temporary + */ +static int reserve(struct ebpf_ctx *ectx) +{ + int i; + int temp = -1; + int count = 0; + + for (i = 0; i < 64; ++i) { + if (TEMP_TO_MAP(i) & ectx->used_temps_map) + ++count; + else if (temp == -1) + temp = i; + } + + ectx->used_temps_map |= TEMP_TO_MAP(temp); + ectx->used_temps = max(ectx->used_temps, count); + return temp; +} + +/** + * Reserves all temporaries in a bitmap + * @ectx: The generation context + * @reserved_map: The temporary bitmap + */ +static void reserve_all(struct ebpf_ctx *ectx, u64 reserved_map) +{ + BUG_ON(ectx->used_temps_map & reserved_map); + + ectx->used_temps_map |= reserved_map; +} + +/** + * Dereserves one temporary + * @ectx: The generation context + * @t: The temporary to dereserve + */ +static void dereserve(struct ebpf_ctx *ectx, int t) +{ + BUG_ON(!(ectx->used_temps_map & TEMP_TO_MAP(t))); + + ectx->used_temps_map &= ~TEMP_TO_MAP(t); +} + +/** + * Dereserves all temporaries in a bitmap + * @ectx: The generation context + * @reserved_map: The temporary bitmap + */ +static void dereserve_all(struct ebpf_ctx *ectx, u64 reserved_map) +{ + BUG_ON((ectx->used_temps_map & reserved_map) != reserved_map); + + ectx->used_temps_map &= ~reserved_map; +} + +/* + * Functions that can be called from inside eBPF code + */ + +u64 ebpf_printk(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + printk(*((char **) &r1), r2); + return 0; +} + +u64 ebpf_add_drop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + bool reinject = r3; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_DROP, + NULL, skb, reinject); + return 0; +} + +u64 ebpf_add_push(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct tcp_sock *sbf = *((struct tcp_sock **) &r2); + struct sk_buff *skb = *((struct sk_buff **) &r3); + bool reinject = r4; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_PUSH, + sbf, skb, reinject); + return 0; +} + +u64 ebpf_ktime_get_raw_ms(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return ktime_get_raw_ns() / 1000000; +} + +u64 ebpf_random(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + unsigned int n; + + get_random_bytes(&n, sizeof(unsigned int)); + return n; +} + +u64 ebpf_has_window_for(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct tcp_sock *sbf = *((struct tcp_sock **) &r2); + struct sk_buff *skb = *((struct sk_buff **) &r3); + unsigned int mss_now = tcp_current_mss(ctx->meta_sk); + + /* RBS copied from mptcp_sched.c */ + /* Don't send on this subflow if we bypass the allowed send-window at + * the per-subflow level. Similar to tcp_snd_wnd_test, but manually + * calculated end_seq (because here at this point end_seq is still at + * the meta-level). + */ + if (after(sbf->write_seq + min(skb->len, mss_now), tcp_wnd_end(sbf))) + return 0; + return 1; +} + +u64 ebpf_bw_out_send(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return mptcp_rbs_sbf_get_bw_send(mptcp_rbs_get_sbf_cb(sbf)); +} + +u64 ebpf_bw_out_ack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return mptcp_rbs_sbf_get_bw_ack(mptcp_rbs_get_sbf_cb(sbf)); +} + +u64 ebpf_sbf_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return mptcp_rbs_get_sbf_cb(sbf)->user; +} + +u64 ebpf_rtt_ms(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return (sbf->srtt_us >> 3) / 1000; +} + +u64 ebpf_queued(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return (sbf->write_seq - sbf->snd_nxt) / sbf->mss_cache; +} + +u64 ebpf_lossy(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + if (inet_csk((struct sock *) sbf)->icsk_ca_state == TCP_CA_Loss) { + mptcp_debug("sbf_is_available %p loss state -> false\n", sbf); + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been + * acked. (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ + if (!tcp_is_reno(sbf)) + return true; + else if (sbf->snd_una != sbf->high_seq) + return true; + } + + return false; +} + +u64 ebpf_sent_on_all(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + u32 mask; + struct tcp_sock *sbf; + + mask = TCP_SKB_CB(skb)->path_mask; + sbf = ctx->mpcb->connection_list; + + while (sbf) { + if (!(mask & mptcp_pi_to_flag(sbf->mptcp->path_index))) + return 0; + + sbf = sbf->mptcp->next; + } + + return 1; +} + +u64 ebpf_skb_length(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = *((struct sk_buff **) &r1); + + return TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; +} + +u64 ebpf_skb_seq(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = *((struct sk_buff **) &r1); + + return TCP_SKB_CB(skb)->seq; +} + +u64 ebpf_skb_psh(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = *((struct sk_buff **) &r1); + + return TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH; +} + +u64 ebpf_q_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb_candidate = *((struct sk_buff **) &r2); + struct sk_buff *skb; + + if (skb_candidate) { + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, + skb_candidate)) + skb_candidate = NULL; + else + skb_candidate = skb_queue_next( + &ctx->meta_sk->sk_write_queue, skb_candidate); + } else + skb_candidate = ctx->rbs_cb->queue_position; + + skb = mptcp_rbs_next_in_queue(&ctx->meta_sk->sk_write_queue, + skb_candidate); + + return (size_t) skb; +} + +u64 ebpf_qu_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + + if (skb) { + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb)) + skb = NULL; + else { + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + } else { + if (ctx->meta_sk->sk_write_queue.qlen == 0) + skb = NULL; + else + skb = skb_peek(&ctx->meta_sk->sk_write_queue); + } + + if (skb == ctx->rbs_cb->queue_position) { + mptcp_debug( + "%s skb %p matches the queue_position, we are at the end\n", + __func__, skb); + skb = NULL; + } + + while (skb && TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue) { + mptcp_debug("%s skips skb %p\n", __func__, skb); + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb) || + /* Empty because it points to the element in Q */ + skb == ctx->rbs_cb->queue_position) { + skb = NULL; + break; + } else + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + + return (size_t) skb; +} + +u64 ebpf_rq_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb_candidate = *((struct sk_buff **) &r2); + struct sk_buff *skb; + + if (skb_candidate) { + if (skb_queue_is_last(&ctx->mpcb->reinject_queue, + skb_candidate)) { + skb_candidate = NULL; + } else { + skb_candidate = skb_queue_next( + &ctx->mpcb->reinject_queue, skb_candidate); + } + } else + skb_candidate = skb_peek(&ctx->mpcb->reinject_queue); + + skb = + mptcp_rbs_next_in_queue(&ctx->mpcb->reinject_queue, skb_candidate); + + return (size_t) skb; +} + +u64 ebpf_subflows_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct tcp_sock *sbf = *((struct tcp_sock **) &r2); + + if (sbf) + sbf = sbf->mptcp->next; + else + sbf = ctx->mpcb->connection_list; + + /* Skip unavailable subflows */ + while (sbf && !mptcp_rbs_sbf_is_available(sbf)) { + sbf = sbf->mptcp->next; + } + + return (size_t) sbf; +} + +u64 ebpf_varlist_expand(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_var *var = *((struct mptcp_rbs_var **) &r1); + struct tcp_sock **item = *((struct tcp_sock ***) &r2); + int capacity; + int index; + + BUILD_BUG_ON(offsetof(struct mptcp_rbs_var, sbf_list_value) != + offsetof(struct mptcp_rbs_var, skb_list_value)); + + if (!item) { + index = 0; + capacity = 8; + } else { + index = + (item - var->sbf_list_value) / sizeof(struct tcp_sock *); + capacity = (index + 1) * 2; + } + + var->sbf_list_value = + krealloc(var->sbf_list_value, capacity * sizeof(struct tcp_sock *), + GFP_KERNEL); + memset(&var->sbf_list_value[index], 0, + (capacity - index - 1) * sizeof(struct tcp_sock *)); + var->sbf_list_value[capacity - 1] = (struct tcp_sock *) 1; + + return (size_t) &var->sbf_list_value[index]; +} + +u64 ebpf_skb_list_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + enum mptcp_rbs_value_kind underlying_queue_kind = + (enum mptcp_rbs_value_kind) r3; + + ctx->side_effects = 1; + + if (underlying_queue_kind == VALUE_KIND_Q) { + /* + * Pop an element from Q might be the queue_position or later + */ + if (skb == ctx->rbs_cb->queue_position) { + mptcp_rbs_advance_send_head( + ctx->meta_sk, &ctx->rbs_cb->queue_position); + mptcp_rbs_debug( + "rbs_q_pop returns %p, new queue head %p\n", skb, + ctx->rbs_cb->queue_position); + } else { + /* we can not unlink the packet, as all skbs have to + * stay in the circular buffer */ + mptcp_debug( + "%s sets not_in_queue for packet %p in Q, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + } + + return (size_t) skb; + } + + if (underlying_queue_kind == VALUE_KIND_RQ) { + mptcp_debug("%s sets not_in_queue, to_free and to_unlink for " + "packet %p in RQ, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_free = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_unlink = 1; + + return (size_t) skb; + } + + if (underlying_queue_kind == VALUE_KIND_QU) { + mptcp_debug( + "%s sets not_in_queue for packet %p in QU, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + return (size_t) skb; + } + + BUG_ON(true); + return 0; +} + +static struct bpf_func_proto func_protos[] = { + { + .func = &ebpf_printk, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_add_drop, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }, + { + .func = &ebpf_add_push, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, + }, + { + .func = &ebpf_ktime_get_raw_ms, + .gpl_only = false, + .ret_type = RET_INTEGER, + }, + { + .func = &ebpf_random, + .gpl_only = false, + .ret_type = RET_INTEGER, + }, + { + .func = &ebpf_has_window_for, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }, + { + .func = &ebpf_bw_out_send, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_bw_out_ack, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_rtt_ms, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_lossy, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_queued, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_length, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_seq, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_psh, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_sbf_user, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_sent_on_all, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_q_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_qu_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_rq_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_subflows_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_varlist_expand, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_list_pop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }, +}; + +static const struct bpf_func_proto *get_func_proto(enum bpf_func_id func_id) +{ + int index = func_id - BPF_FUNC_mptcp_rbs_printk; + if (index < 0 || index >= ARRAY_SIZE(func_protos)) + return NULL; + return &func_protos[index]; +} + +static bool is_valid_access(int off, int size, enum bpf_access_type type) +{ + return false; +} + +static struct bpf_verifier_ops bpf_ops = { + .get_func_proto = get_func_proto, + .is_valid_access = is_valid_access, +}; + +static bool gen_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock); + +static bool gen_list_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, + u64 *reserved_temps_map); + +static bool noinline mptcp_rbs_value_constint_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_constint *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, value->value)); + + return false; +} + +static bool noinline mptcp_rbs_value_conststring_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_conststring *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* Put the string in strs */ + int len = strlen(value->value); + int idx = ectx->strs_len; + char *new_str; + + ++ectx->strs_len; + ectx->strs = + krealloc(ectx->strs, ectx->strs_len * sizeof(char *), GFP_KERNEL); + ectx->strs[idx] = kmalloc(len + 1, GFP_KERNEL); + new_str = ectx->strs[idx]; + memcpy(new_str, value->value, len + 1); + + add_instr_ectx( + ectx, + EBPF_RAW_INSTR(((struct bpf_insn){.code = BPF_LD | BPF_DW | BPF_IMM, + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = (u32)(size_t)(new_str) }), + -1, -1, -1, -1, -1, temp)); + add_instr_ectx( + ectx, EBPF_RAW_INSTR(((struct bpf_insn){ + .code = 0, + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = ((u64)(size_t)(new_str)) >> 32 }), + -1, -1, -1, -1, -1, -1)); + + return false; +} + +static bool noinline mptcp_rbs_value_null_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_null *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* TODO We don't need this right? */ + BUG_ON(true); + return false; +} + +static bool noinline mptcp_rbs_value_bool_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_bool_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(s32)), temp, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, bool_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_int_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_int_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(s64)), temp, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, int_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_string_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_string_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(char *)), temp, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, string_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_sbf_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock *)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, sbf_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_sbf_list_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock **)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, sbf_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_sbf_list_var_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int temp_ptr; + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + /* Check if variable is NULL */ + temp_ptr = reserve(ectx); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock **)), + temp_ptr, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, sbf_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock *)), + temp, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_ALU_IMM(BPF_ADD, temp_ptr, sizeof(struct tcp_sock *))); + add_instr(*cont_eblock, &capacity, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock *)), + temp, temp_ptr, 0)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (sbf) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = TEMP_TO_MAP(temp_ptr); + return true; +} + +static bool noinline mptcp_rbs_value_skb_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff *)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, skb_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_skb_list_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff **)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, skb_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_skb_list_var_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int temp_ptr; + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + /* Check if variable is NULL */ + temp_ptr = reserve(ectx); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff **)), temp_ptr, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, skb_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff *)), + temp, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_ALU_IMM(BPF_ADD, temp_ptr, sizeof(struct sk_buff *))); + add_instr(*cont_eblock, &capacity, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff *)), + temp, temp_ptr, 0)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = TEMP_TO_MAP(temp_ptr); + return true; +} + +static bool noinline mptcp_rbs_value_not_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_not *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->operand, + temp, null_eblock); + + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_XOR, temp, 1)); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_equal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_equal *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JNE, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (unequal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_unequal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_unequal *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JEQ, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (unequal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_less_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_less *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGE, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_less_equal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_less_equal *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGT, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_greater_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_greater *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGT, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_greater_equal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_greater_equal *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGE, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_and_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_and *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *false_eblock; + int capacity; + + /* Prepare the false block that is used instead of the null block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(false_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(false_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate left operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, false_eblock); + + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = false_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* Calculate right operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp, false_eblock); + + /* Create the last block as jump target for the false block */ + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + false_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_or_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_or *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *true_eblock; + struct mptcp_rbs_ebpf_block *false_eblock; + int capacity; + + /* Prepare the true block */ + true_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(true_eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(true_eblock, &capacity, EBPF_JMP_OFF()); + + /* Prepare the false block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Calculate left operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, false_eblock); + + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + ectx->eblock->next_else = true_eblock; + ectx->eblock->next = false_eblock; + ectx->eblock = false_eblock; + ectx->capacity = 0; + + /* Prepare the a new false block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(false_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(false_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate right operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp, false_eblock); + + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + true_eblock->next = ectx->eblock->next; + false_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_add_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_add *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_right; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_ADD, temp, temp_right)); + dereserve(ectx, temp_right); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_subtract_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_subtract *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_right; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_SUB, temp, temp_right)); + dereserve(ectx, temp_right); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_multiply_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_multiply *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_right; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_MUL, temp, temp_right)); + dereserve(ectx, temp_right); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_divide_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_divide *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_right; + + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, null_eblock); + temp_right = reserve(ectx); + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock); + + /* Check if right operand is 0 */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_right, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_DIV, temp, temp_right)); + dereserve(ectx, temp_right); + + return true; +} + +static bool noinline mptcp_rbs_value_remainder_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_remainder *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_right; + + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, null_eblock); + temp_right = reserve(ectx); + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock); + + /* Check if right operand is 0 */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_right, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_MOD, temp, temp_right)); + dereserve(ectx, temp_right); + + return true; +} + +static bool noinline mptcp_rbs_value_is_null_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_is_null *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *true_eblock; + int capacity; + + /* Prepare true block */ + true_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(true_eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(true_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate the operand */ + if (!gen_value(ectx, (const struct mptcp_rbs_value *) value->operand, + temp, true_eblock)) { + mptcp_rbs_ebpf_block_free(true_eblock); + true_eblock = NULL; + } + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (true_eblock) + true_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_is_not_null_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_is_not_null *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *false_eblock; + int capacity; + + /* Prepare false block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(false_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(false_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate the operand */ + if (!gen_value(ectx, (const struct mptcp_rbs_value *) value->operand, + temp, false_eblock)) { + mptcp_rbs_ebpf_block_free(false_eblock); + false_eblock = NULL; + } + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (false_eblock) + false_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_reg_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_reg *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(unsigned int)), + temp, REGS_TMP, + sizeof(unsigned int) * value->reg_number)); + + return false; +} + +static bool noinline mptcp_rbs_value_sbf_list_next_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_next *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &BLOCK_INFO(ectx->block)->break_eblock, + &BLOCK_INFO(ectx->block)->cont_eblock, + &BLOCK_INFO(ectx->block)->reserved_temps_map); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_skb_list_next_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_next *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &BLOCK_INFO(ectx->block)->break_eblock, + &BLOCK_INFO(ectx->block)->cont_eblock, + &BLOCK_INFO(ectx->block)->reserved_temps_map); + + return null_eblock_used; +} + +/* + * Q sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_q_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_q *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_q_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_q *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, + EBPF_CALL(ebpf_q_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_CALL(ebpf_q_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * QU sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_qu_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_qu *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_qu_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_qu *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx( + ectx, EBPF_CALL(ebpf_qu_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_CALL(ebpf_qu_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * RQ sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_rq_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_rq *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_rq_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_rq *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx( + ectx, EBPF_CALL(ebpf_rq_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_CALL(ebpf_rq_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * SUBFLOWS subflow list value + */ + +static bool noinline mptcp_rbs_value_subflows_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_subflows *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_subflows_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_subflows *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_subflows_next, CTX_TMP, temp, -1, + -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr( + *cont_eblock, &capacity, + EBPF_CALL(ebpf_subflows_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (sbf) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * CURRENT_TIME_MS integer value + */ + +static bool noinline mptcp_rbs_value_current_time_ms_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_current_time_ms *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx( + ectx, EBPF_CALL(ebpf_ktime_get_raw_ms, -1, -1, -1, -1, -1, temp)); + + return false; +} + +/* + * RANDOM integer value + */ + +static bool noinline mptcp_rbs_value_random_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_random *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx(ectx, EBPF_CALL(ebpf_random, -1, -1, -1, -1, -1, temp)); + + return false; +} + +/* + * .RTT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_rtt_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_rtt *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, srtt_us))); + + return null_eblock_used; +} + +/* + * .RTT_VAR integer value + */ + +static bool noinline mptcp_rbs_value_sbf_rtt_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_rtt_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, rttvar_us))); + + return null_eblock_used; +} + +/* + * .RTT_MS integer value + */ +static bool noinline mptcp_rbs_value_sbf_rtt_ms_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_rtt_ms *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_rtt_ms, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .QUEUED integer value + */ +static bool noinline mptcp_rbs_value_sbf_queued_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_queued *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_queued, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .USER integer value + */ + +static bool noinline mptcp_rbs_value_sbf_user_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_user *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_sbf_user, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .IS_BACKUP boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_is_backup_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_is_backup *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_tcp_sock bitfields; + int val; + int shift; + int shift2; + int temp2; + + /* We need to find a shift amount to access the low_prio and + * rcv_low_prio bit fields + */ + memset(&bitfields, 0, sizeof(struct mptcp_tcp_sock)); + bitfields.low_prio = 1; + bitfields.rcv_low_prio = 1; + val = *(&bitfields.map_data_len + 1); + shift = 0; + while (!(val & 1)) { + val >>= 1; + ++shift; + } + val >>= 1; + shift2 = shift + 1; + while (!(val & 1)) { + val >>= 1; + ++shift2; + } + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u16)), temp, temp, + offsetof(struct mptcp_tcp_sock, map_data_len) + + sizeof(u16))); + temp2 = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_REG(temp2, temp)); + if (shift) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp, shift)); + if (shift2) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp2, shift2)); + + add_instr_ectx(ectx, EBPF_ALU_REG(BPF_OR, temp, temp2)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 1)); + dereserve(ectx, temp2); + + return null_eblock_used; +} + +/* + * .CWND integer value + */ + +static bool noinline mptcp_rbs_value_sbf_cwnd_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_cwnd *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, snd_cwnd))); + + return null_eblock_used; +} + +/* + * .SKBS_IN_FLIGHT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_skbs_in_flight_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_sbf_skbs_in_flight *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, packets_out))); + + return null_eblock_used; +} + +/* + * .LOST_SKBS integer value + */ + +static bool noinline mptcp_rbs_value_sbf_lost_skbs_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_lost_skbs *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, lost_out))); + + return null_eblock_used; +} + +/* + * .HAS_WINDOW_FOR boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_has_window_for_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_sbf_has_window_for *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_skb; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + temp_skb = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, + temp_skb, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_CALL(ebpf_has_window_for, CTX_TMP, temp, + temp_skb, -1, -1, temp)); + dereserve(ectx, temp_skb); + + return null_eblock_used; +} + +/* + * .ID integer value + */ + +static bool noinline mptcp_rbs_value_sbf_id_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_id *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u8)), temp, temp, + offsetof(struct mptcp_tcp_sock, sbf_id))); + + return null_eblock_used; +} + +/* + * .DELAY_IN integer value + */ + +static bool noinline mptcp_rbs_value_sbf_delay_in_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_delay_in *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct mptcp_tcp_sock, mptcp_sched) + + offsetof(struct mptcp_rbs_sbf_cb, delay_in))); + + return null_eblock_used; +} + +/* + * .DELAY_OUT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_delay_out_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_delay_out *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct mptcp_tcp_sock, mptcp_sched) + + offsetof(struct mptcp_rbs_sbf_cb, delay_out))); + + return null_eblock_used; +} + +/* + * .BW_OUT_ACK integer value + */ + +static bool noinline mptcp_rbs_value_sbf_bw_out_ack_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_bw_out_ack *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_CALL(ebpf_bw_out_ack, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .BW_OUT_SEND integer value + */ + +static bool noinline mptcp_rbs_value_sbf_bw_out_send_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_bw_out_send *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_CALL(ebpf_bw_out_send, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .SSTHRESH integer value + */ + +static bool noinline mptcp_rbs_value_sbf_ssthresh_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_ssthresh *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, snd_ssthresh))); + + return null_eblock_used; +} + +/* + * .THROTTLED boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_throttled_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_throttled *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(unsigned long)), temp, + temp, offsetof(struct tcp_sock, tsq_flags))); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp, TSQ_THROTTLED)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 1)); + + return null_eblock_used; +} + +/* + * .LOSSY boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_lossy_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_lossy *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_lossy, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .EMPTY boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_list_empty_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_empty *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* empty = true; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 1)); + + temp_sbf = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_sbf, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* empty = false; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + + mptcp_rbs_ebpf_block_free(cont_eblock); + + dereserve(ectx, temp_sbf); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .FILTER subflow list value + */ + +static bool noinline mptcp_rbs_value_sbf_list_filter_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp, + null_eblock) || + null_eblock_used; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_sbf_list_filter_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + bool null_eblock_used; + int temp_t; + struct filter_var var; + struct mptcp_rbs_ebpf_block *cont_eblock2; + int capacity; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, break_eblock, cont_eblock, reserved_temps_map); + + /* if (cond) */ + var.progress = &value->cur; + var.temp = temp; + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + temp_t = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, + temp_t, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_t, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_t); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = *cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* Create extra continue block because they value above might free the + * continue block with the assumption that it is not used. But actually + * it is used + */ + cont_eblock2 = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(cont_eblock2, &capacity, EBPF_JMP_OFF()); + cont_eblock2->next = *cont_eblock; + *cont_eblock = cont_eblock2; + + return null_eblock_used; +} + +/* + * Special value holding the actual subflow for FILTER subflow list value + */ + +static bool noinline mptcp_rbs_value_sbf_list_filter_sbf_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_sbf_list_filter_sbf *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct filter_var *var; + + FOREACH_FILTER_VAR(&ectx->filter_var_list, var, { + if (var->progress == value->cur) { + add_instr_ectx(ectx, EBPF_MOV_REG(temp, var->temp)); + return false; + } + }); + + /* Not found */ + BUG_ON(true); + return false; +} + +/* + * .MAX subflow value + */ + +static bool noinline mptcp_rbs_value_sbf_list_max_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_max *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct filter_var var; + int temp_max; + int temp_t; + + /* temp_max = -1; temp = NULL; */ + temp_max = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_max, -1)); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, + temp_sbf, null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + + /* if (temp_max < item) */ + var.progress = &value->cur; + var.temp = temp_sbf; + temp_t = reserve(ectx); + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp_t, + null_eblock); + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JSGE, temp_max, temp_t)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_MOV_REG(temp_max, temp_t)); + add_instr_ectx(ectx, EBPF_MOV_REG(temp, temp_sbf)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + dereserve(ectx, temp_t); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve(ectx, temp_sbf); + dereserve(ectx, temp_max); + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .MIN subflow value + */ + +static bool noinline mptcp_rbs_value_sbf_list_min_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_min *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct filter_var var; + int temp_min; + int temp_t; + + /* temp_min = 0x100000000; temp = NULL; */ + temp_min = reserve(ectx); + add_instr_ectx( + ectx, + EBPF_RAW_INSTR(((struct bpf_insn){.code = BPF_LD | BPF_DW | BPF_IMM, + .dst_reg = temp_min, + .src_reg = 0, + .off = 0, + .imm = 0 }), + -1, -1, -1, -1, -1, temp_min)); + add_instr_ectx(ectx, EBPF_RAW_INSTR(((struct bpf_insn){.code = 0, + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = 1 }), + -1, -1, -1, -1, -1, -1)); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, + temp_sbf, null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + + /* if (temp_min > item) */ + var.progress = &value->cur; + var.temp = temp_sbf; + temp_t = reserve(ectx); + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp_t, + null_eblock); + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGE, temp_t, temp_min)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_MOV_REG(temp_min, temp_t)); + add_instr_ectx(ectx, EBPF_MOV_REG(temp, temp_sbf)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + dereserve(ectx, temp_t); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve(ectx, temp_sbf); + dereserve(ectx, temp_min); + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .GET subflow value + */ + +static bool noinline mptcp_rbs_value_sbf_list_get_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_get *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_i; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + temp_i = reserve(ectx); + gen_value(ectx, (const struct mptcp_rbs_value *) value->index, temp_i, + null_eblock); + + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + + /* if (i == 0) */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_i, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = break_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* --i; */ + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_SUB, temp_i, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if index was found */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve(ectx, temp_i); + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .COUNT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_list_count_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_count *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* i = 0; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_sbf, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* ++i; */ + add_instr_ectx(ectx, EBPF_ALU32_IMM(BPF_ADD, temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + dereserve(ectx, temp_sbf); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .SUM integer value + */ + +static bool noinline mptcp_rbs_value_sbf_list_sum_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_sum *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct filter_var var; + int temp_t; + + /* sum = 0; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_sbf, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* sum += item; */ + var.progress = &value->cur; + var.temp = temp_sbf; + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + temp_t = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, + temp_t, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_ADD, temp, temp_t)); + dereserve(ectx, temp_t); + POP_FILTER_VAR(&ectx->filter_var_list); + + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + dereserve(ectx, temp_sbf); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .SENT_ON boolean value + */ + +static bool noinline mptcp_rbs_value_skb_sent_on_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_sent_on *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + temp_sbf = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, + temp_sbf, null_eblock) || + null_eblock_used; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp_sbf, temp_sbf, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u8)), temp_sbf, temp_sbf, + offsetof(struct mptcp_tcp_sock, path_index))); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_SUB, temp_sbf, 1)); + + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct sk_buff, cb) + + offsetof(struct tcp_skb_cb, path_mask))); + add_instr_ectx(ectx, EBPF_ALU_REG(BPF_RSH, temp, temp_sbf)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 1)); + dereserve(ectx, temp_sbf); + + return null_eblock_used; +} + +/* + * .SENT_ON_ALL boolean value + */ + +static bool noinline mptcp_rbs_value_skb_sent_on_all_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_sent_on_all *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_sent_on_all, CTX_TMP, temp, -1, -1, -1, temp)); + + return null_eblock_used; +} + + +/* + * .LENGTH integer value + */ + +static bool noinline mptcp_rbs_value_skb_length_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_length *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_skb_length, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + + +/* + * .SKB_SEQ integer value + */ + +static bool noinline mptcp_rbs_value_skb_seq_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_seq *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_skb_seq, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + + +/* + * .PSH boolean value + */ + +static bool noinline mptcp_rbs_value_skb_psh_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_psh *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_skb_psh, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .USER integer value + */ + +static bool noinline mptcp_rbs_value_skb_user_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_user *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + union tcp_skb_cb_rbs bitfields; + int shift; + + /* We need to find a shift amount to access the user bit field */ + bitfields.b = 0; + bitfields.user = 0x1f; + shift = 0; + while (!(bitfields.b & 1)) { + bitfields.b >>= 1; + ++shift; + } + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u8)), temp, temp, + offsetof(struct sk_buff, cb) + + offsetof(struct tcp_skb_cb, mptcp_rbs))); + if (shift) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp, shift)); + if (shift < 3) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 0x1f)); + + return null_eblock_used; +} + +/* + * .EMPTY boolean value + */ + +static bool noinline mptcp_rbs_value_skb_list_empty_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_empty *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_skb; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* empty = true; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 1)); + + temp_skb = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_skb, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* empty = false; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + + mptcp_rbs_ebpf_block_free(cont_eblock); + + dereserve(ectx, temp_skb); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .POP() sockbuffer value + */ + +static bool noinline mptcp_rbs_value_skb_list_pop_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_pop *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + int temp_underlying_queue_kind; + + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + mptcp_rbs_ebpf_block_free(cont_eblock); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve_all(ectx, reserved_temps_map); + + temp_underlying_queue_kind = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_underlying_queue_kind, + value->list->underlying_queue_kind)); + add_instr_ectx(ectx, + EBPF_CALL(ebpf_skb_list_pop, CTX_TMP, temp, + temp_underlying_queue_kind, -1, -1, temp)); + dereserve(ectx, temp_underlying_queue_kind); + return true; +} + +/* + * .FILTER() sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_skb_list_filter_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp, + null_eblock) || + null_eblock_used; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_skb_list_filter_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + bool null_eblock_used; + int temp_t; + struct filter_var var; + struct mptcp_rbs_ebpf_block *cont_eblock2; + int capacity; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, break_eblock, cont_eblock, reserved_temps_map); + + /* if (cond) */ + var.progress = &value->progress; + var.temp = temp; + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + temp_t = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, + temp_t, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_t, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_t); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = *cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* Create extra continue block because they value above might free the + * continue block with the assumption that it is not used. But actually + * it is used + */ + cont_eblock2 = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(cont_eblock2, &capacity, EBPF_JMP_OFF()); + cont_eblock2->next = *cont_eblock; + *cont_eblock = cont_eblock2; + + return null_eblock_used; +} + +/* + * Special value holding the actual sockbuffer for FILTER sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_skb_list_filter_skb_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_skb_list_filter_skb *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct filter_var *var; + + FOREACH_FILTER_VAR(&ectx->filter_var_list, var, { + if (var->progress == value->progress) { + add_instr_ectx(ectx, EBPF_MOV_REG(temp, var->temp)); + return false; + } + }); + + /* Not found */ + BUG_ON(true); + return false; +} + +/* + * .COUNT integer value + */ + +static bool noinline mptcp_rbs_value_skb_list_count_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_count *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_skb; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* i = 0; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_skb = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_skb, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* ++i; */ + add_instr_ectx(ectx, EBPF_ALU32_IMM(BPF_ADD, temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + dereserve(ectx, temp_skb); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .TOP sockbuffer value + */ + +static bool noinline mptcp_rbs_value_skb_list_top_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_top *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + mptcp_rbs_ebpf_block_free(cont_eblock); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .GET sockbuffer value + */ + +static bool noinline mptcp_rbs_value_skb_list_get_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_get *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + printk("%s is not implemented yet for eBPF.", __func__); + BUG_ON(true); + return true; +} + +/** + * Generates the eBPF instructions for a value in the current block. For values + * returning lists this function can only check for NULL + * @ectx: The generation context + * @value: The CFG value + * @temp: Temporary where the value should be stored + * @null_eblock: The eBPF block the code should jump to if this value is NULL + * @return: true if the null_eblock was referenced + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +static bool gen_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ +#define APPLY_GEN_VALUE(ENUM, STRUCT) \ + case ENUM: \ + return STRUCT##_gen(ectx, (const struct STRUCT *) value, temp, \ + null_eblock); + + switch (value->kind) { + case VALUE_KIND_CONSTINT: + return mptcp_rbs_value_constint_gen( + ectx, (const struct mptcp_rbs_value_constint *) value, temp, + null_eblock); + case VALUE_KIND_CONSTSTRING: + return mptcp_rbs_value_conststring_gen( + ectx, (const struct mptcp_rbs_value_conststring *) value, + temp, null_eblock); + case VALUE_KIND_NULL: + return mptcp_rbs_value_null_gen( + ectx, (const struct mptcp_rbs_value_null *) value, temp, + null_eblock); + case VALUE_KIND_BOOL_VAR: + return mptcp_rbs_value_bool_var_gen( + ectx, (const struct mptcp_rbs_value_bool_var *) value, temp, + null_eblock); + case VALUE_KIND_INT_VAR: + return mptcp_rbs_value_int_var_gen( + ectx, (const struct mptcp_rbs_value_int_var *) value, temp, + null_eblock); + case VALUE_KIND_STRING_VAR: + return mptcp_rbs_value_string_var_gen( + ectx, (const struct mptcp_rbs_value_string_var *) value, + temp, null_eblock); + case VALUE_KIND_SBF_VAR: + return mptcp_rbs_value_sbf_var_gen( + ectx, (const struct mptcp_rbs_value_sbf_var *) value, temp, + null_eblock); + case VALUE_KIND_SBFLIST_VAR: + return mptcp_rbs_value_sbf_list_var_gen( + ectx, (const struct mptcp_rbs_value_sbf_list_var *) value, + temp, null_eblock); + case VALUE_KIND_SKB_VAR: + return mptcp_rbs_value_skb_var_gen( + ectx, (const struct mptcp_rbs_value_skb_var *) value, temp, + null_eblock); + case VALUE_KIND_SKBLIST_VAR: + return mptcp_rbs_value_skb_list_var_gen( + ectx, (const struct mptcp_rbs_value_skb_list_var *) value, + temp, null_eblock); + case VALUE_KIND_NOT: + return mptcp_rbs_value_not_gen( + ectx, (const struct mptcp_rbs_value_not *) value, temp, + null_eblock); + case VALUE_KIND_EQUAL: + return mptcp_rbs_value_equal_gen( + ectx, (const struct mptcp_rbs_value_equal *) value, temp, + null_eblock); + case VALUE_KIND_UNEQUAL: + return mptcp_rbs_value_unequal_gen( + ectx, (const struct mptcp_rbs_value_unequal *) value, temp, + null_eblock); + case VALUE_KIND_LESS: + return mptcp_rbs_value_less_gen( + ectx, (const struct mptcp_rbs_value_less *) value, temp, + null_eblock); + case VALUE_KIND_LESS_EQUAL: + return mptcp_rbs_value_less_equal_gen( + ectx, (const struct mptcp_rbs_value_less_equal *) value, + temp, null_eblock); + case VALUE_KIND_GREATER: + return mptcp_rbs_value_greater_gen( + ectx, (const struct mptcp_rbs_value_greater *) value, temp, + null_eblock); + case VALUE_KIND_GREATER_EQUAL: + return mptcp_rbs_value_greater_equal_gen( + ectx, (const struct mptcp_rbs_value_greater_equal *) value, + temp, null_eblock); + case VALUE_KIND_AND: + return mptcp_rbs_value_and_gen( + ectx, (const struct mptcp_rbs_value_and *) value, temp, + null_eblock); + case VALUE_KIND_OR: + return mptcp_rbs_value_or_gen( + ectx, (const struct mptcp_rbs_value_or *) value, temp, + null_eblock); + case VALUE_KIND_ADD: + return mptcp_rbs_value_add_gen( + ectx, (const struct mptcp_rbs_value_add *) value, temp, + null_eblock); + case VALUE_KIND_SUBTRACT: + return mptcp_rbs_value_subtract_gen( + ectx, (const struct mptcp_rbs_value_subtract *) value, temp, + null_eblock); + case VALUE_KIND_MULTIPLY: + return mptcp_rbs_value_multiply_gen( + ectx, (const struct mptcp_rbs_value_multiply *) value, temp, + null_eblock); + case VALUE_KIND_DIVIDE: + return mptcp_rbs_value_divide_gen( + ectx, (const struct mptcp_rbs_value_divide *) value, temp, + null_eblock); + case VALUE_KIND_REMAINDER: + return mptcp_rbs_value_remainder_gen( + ectx, (const struct mptcp_rbs_value_remainder *) value, + temp, null_eblock); + case VALUE_KIND_IS_NULL: + return mptcp_rbs_value_is_null_gen( + ectx, (const struct mptcp_rbs_value_is_null *) value, temp, + null_eblock); + case VALUE_KIND_IS_NOT_NULL: + return mptcp_rbs_value_is_not_null_gen( + ectx, (const struct mptcp_rbs_value_is_not_null *) value, + temp, null_eblock); + case VALUE_KIND_REG: + return mptcp_rbs_value_reg_gen( + ectx, (const struct mptcp_rbs_value_reg *) value, temp, + null_eblock); + case VALUE_KIND_SBFLIST_NEXT: + return mptcp_rbs_value_sbf_list_next_gen( + ectx, (const struct mptcp_rbs_value_sbf_list_next *) value, + temp, null_eblock); + case VALUE_KIND_SKBLIST_NEXT: + return mptcp_rbs_value_skb_list_next_gen( + ectx, (const struct mptcp_rbs_value_skb_list_next *) value, + temp, null_eblock); + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } + + return false; +} +#pragma GCC diagnostic pop + +/** + * Generates the eBPF instructions for a list value in the current block + * @ectx: The generation context + * @value: The CFG value + * @temp: Temporary where the item should be stored + * @null_eblock: The eBPF block the code should jump to if this value is NULL + * @break_eblock: Here will be stored the eBPF block that can be jumped to to + * break from the loop + * @cont_eblock: The eBPF block the code can use to continue to the next loop + * iteration. Note that this block should directly jump to the start block of + * the loop without any blocks inbetween + * @reserved_temps: Number of temporaries that were reserved for the loop + * @return: true if the null_eblock was referenced + */ +static bool gen_list_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, + u64 *reserved_temps_map) +{ +#define APPLY_GEN2_VALUE_TYPE_KIND_BOOL(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_INT(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_STRING(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_SBF(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_SBFLIST(ENUM, STRUCT) \ + case ENUM: \ + return STRUCT##_gen2(ectx, (const struct STRUCT *) value, \ + temp, null_eblock, break_eblock, \ + cont_eblock, reserved_temps_map); +#define APPLY_GEN2_VALUE_TYPE_KIND_SKB(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_SKBLIST(ENUM, STRUCT) \ + APPLY_GEN2_VALUE_TYPE_KIND_SBFLIST(ENUM, STRUCT) +#define APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE_##RETURNTYPE(ENUM, STRUCT) + + switch (value->kind) { + case VALUE_KIND_SBFLIST_VAR: + return mptcp_rbs_value_sbf_list_var_gen2( + ectx, (const struct mptcp_rbs_value_sbf_list_var *) value, + temp, null_eblock, break_eblock, cont_eblock, + reserved_temps_map); + case VALUE_KIND_SKBLIST_VAR: + return mptcp_rbs_value_skb_list_var_gen2( + ectx, (const struct mptcp_rbs_value_skb_list_var *) value, + temp, null_eblock, break_eblock, cont_eblock, + reserved_temps_map); +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + default: { + BUG_ON(true); + return false; + } + } +} + +static void gen_smt_drop(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_drop *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp_skb; + int temp_reinject; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + temp_skb = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->skb, temp_skb, eblock); + + temp_reinject = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_reinject, smt->skb->reinject)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_add_drop, CTX_TMP, temp_skb, + temp_reinject, -1, -1, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_reinject); + dereserve(ectx, temp_skb); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_print(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_print *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp_str; + int temp_arg = -1; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + temp_str = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->msg, temp_str, eblock); + if (smt->arg) { + temp_arg = reserve(ectx); + gen_value(ectx, smt->arg, temp_arg, eblock); + } + add_instr_ectx( + ectx, EBPF_CALL(ebpf_printk, temp_str, temp_arg, -1, -1, -1, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + if (temp_arg != -1) + dereserve(ectx, temp_arg); + dereserve(ectx, temp_str); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_push(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_push *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp_sbf; + int temp_skb; + int temp_reinject; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + temp_sbf = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->sbf, temp_sbf, eblock); + temp_skb = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->skb, temp_skb, eblock); + + temp_reinject = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_reinject, smt->skb->reinject)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_add_push, CTX_TMP, temp_sbf, + temp_skb, temp_reinject, -1, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_reinject); + dereserve(ectx, temp_skb); + dereserve(ectx, temp_sbf); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_set(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_set *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + add_instr_ectx( + ectx, + EBPF_ST_MEM(bytes_to_bpf_size(sizeof(bool)), CTX_TMP, + offsetof(struct mptcp_rbs_eval_ctx, side_effects), 1)); + temp = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->value, temp, eblock); + + add_instr_ectx(ectx, + EBPF_STX_MEM(bytes_to_bpf_size(sizeof(unsigned int)), + REGS_TMP, temp, + smt->reg_number * sizeof(unsigned int))); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_list_var(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_var *smt) +{ + struct mptcp_rbs_ebpf_block *null_eblock; + int temp_var; + int temp_item; + int temp_cur; + int temp; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct mptcp_rbs_ebpf_block *call_eblock; + int capacity; + + temp_var = reserve(ectx); + temp_item = reserve(ectx); + temp_cur = reserve(ectx); + + /* Prepare null block */ + null_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr( + null_eblock, &capacity, + EBPF_ST_MEM(bytes_to_bpf_size(sizeof(void *)), VARS_TMP, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, sbf_list_value), + 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + /* Get var pointer */ + add_instr_ectx(ectx, EBPF_MOV_REG(temp_var, VARS_TMP)); + add_instr_ectx( + ectx, EBPF_ALU_IMM(BPF_ADD, temp_var, + sizeof(struct mptcp_rbs_var) * smt->var_number)); + + /* cur = NULL; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_cur, 0)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_varlist_expand, temp_var, temp_cur, + -1, -1, -1, temp_cur)); + + if (gen_list_value(ectx, smt->value, temp_item, null_eblock, + &break_eblock, &cont_eblock, &reserved_temps_map)) + null_eblock->next = break_eblock; + else + mptcp_rbs_ebpf_block_free(null_eblock); + + /* Check if we have to allocate more space */ + temp = reserve(ectx); + add_instr_ectx(ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), + temp, temp_cur, 0)); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JNE, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Fill the call block */ + call_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(call_eblock, &capacity, + EBPF_CALL(ebpf_varlist_expand, temp_var, temp_cur, -1, -1, -1, + temp_cur)); + add_instr(call_eblock, &capacity, EBPF_JMP_OFF()); + call_eblock->next = ectx->eblock->next; + ectx->eblock->next_else = call_eblock; + + /* *cur = item; ++cur; */ + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_STX_MEM(bytes_to_bpf_size(sizeof(void *)), + temp_cur, temp_item, 0)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_ADD, temp_cur, sizeof(void *))); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + ectx->eblock = break_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_ST_MEM(bytes_to_bpf_size(sizeof(void *)), + temp_cur, 0, 0)); + + dereserve(ectx, temp_cur); + dereserve(ectx, temp_item); + dereserve(ectx, temp_var); + dereserve_all(ectx, reserved_temps_map); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +static void gen_smt_var(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_var *smt) +{ + enum mptcp_rbs_type_kind type; + struct mptcp_rbs_ebpf_block *null_eblock; + int capacity; + int temp; + + /* var->type = type; */ + type = mptcp_rbs_value_get_type(smt->value->kind); + add_instr_ectx( + ectx, EBPF_ST_MEM( + bytes_to_bpf_size(sizeof(enum mptcp_rbs_type_kind)), + VARS_TMP, sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, type), + type)); + + /* We do not support lazy evaluation + * var->is_lazy = false; + */ + add_instr_ectx( + ectx, EBPF_ST_MEM(bytes_to_bpf_size(sizeof(bool)), VARS_TMP, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, is_lazy), + false)); + + switch (type) { + case TYPE_KIND_NULL: + break; + case TYPE_KIND_BOOL: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, -1)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(s32)), VARS_TMP, temp, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, bool_value))); + + dereserve(ectx, temp); + break; + } + case TYPE_KIND_INT: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, -1)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(s64)), VARS_TMP, temp, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, int_value))); + + dereserve(ectx, temp); + break; + } + case TYPE_KIND_STRING: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, + EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(char *)), VARS_TMP, temp, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, string_value))); + + dereserve(ectx, temp); + break; + } + case TYPE_KIND_SBF: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (smt->value->kind == VALUE_KIND_SBFLIST_NEXT) + ectx->next_var_null_eblock = null_eblock; + else + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, + EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(struct tcp_sock *)), VARS_TMP, + temp, sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, sbf_value))); + + dereserve(ectx, temp); + + /* Set next_var if the value was a *_NEXT call */ + if (smt->value->kind == VALUE_KIND_SBFLIST_NEXT) + ectx->next_var = smt->var_number; + break; + } + case TYPE_KIND_SBFLIST: + case TYPE_KIND_SKBLIST: { + gen_smt_list_var(ectx, smt); + break; + } + case TYPE_KIND_SKB: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (smt->value->kind == VALUE_KIND_SKBLIST_NEXT) + ectx->next_var_null_eblock = null_eblock; + else + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, + EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(struct sk_buff *)), VARS_TMP, + temp, sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, skb_value))); + dereserve(ectx, temp); + + /* Set next_var if the value was a *_NEXT call */ + if (smt->value->kind == VALUE_KIND_SKBLIST_NEXT) + ectx->next_var = smt->var_number; + break; + } + } +} +#pragma GCC diagnostic pop + +/** + * Generates the eBPF instructions for a statement in the current block + * @ectx: The generation context + * @smt: The CFG statement + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +static void gen_smt(struct ebpf_ctx *ectx, const struct mptcp_rbs_smt *smt) +{ + switch (smt->kind) { + case SMT_KIND_DROP: { + gen_smt_drop(ectx, (const struct mptcp_rbs_smt_drop *) smt); + break; + } + case SMT_KIND_PRINT: { + gen_smt_print(ectx, (const struct mptcp_rbs_smt_print *) smt); + break; + } + case SMT_KIND_PUSH: { + gen_smt_push(ectx, (const struct mptcp_rbs_smt_push *) smt); + break; + } + case SMT_KIND_SET: { + gen_smt_set(ectx, (const struct mptcp_rbs_smt_set *) smt); + break; + } + case SMT_KIND_VAR: { + gen_smt_var(ectx, (const struct mptcp_rbs_smt_var *) smt); + break; + } + case SMT_KIND_VOID: { + /* We do not generate eBPF code for VOID */ + break; + } + case SMT_KIND_EBPF: { + /* Cannot generate eBPF code from eBPF */ + BUG_ON(true); + break; + } + case SMT_KIND_SET_USER: { + printk("eBPF for set user is not implemented yet\n"); + BUG_ON(true); + break; + } + } +} +#pragma GCC diagnostic pop + +static bool path_exists_helper(const struct mptcp_rbs_cfg_block *a, + const struct mptcp_rbs_cfg_block *b, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block; + FOREACH_BLOCK(list, block, { + if (a == block) + return false; + }); + ADD_BLOCK(list, (struct mptcp_rbs_cfg_block *) a); + + if (a == b) + return true; + if (a->next && path_exists_helper(a->next, b, list)) + return true; + if (a->next_else && path_exists_helper(a->next_else, b, list)) + return true; + + return false; +} + +static bool path_exists(const struct mptcp_rbs_cfg_block *a, + const struct mptcp_rbs_cfg_block *b) +{ + struct mptcp_rbs_cfg_block_list list; + bool found; + + INIT_BLOCK_LIST(&list); + found = path_exists_helper(a, b, &list); + FREE_BLOCK_LIST(&list); + + return found; +} + +/** + * Generates the eBPF instructions for a CFG block and its successors + * @ectx: The generation context + * @block: The CFG block + * @list: List with CFG blocks that were already processed + * @return: The generated eBPF block + */ +static struct mptcp_rbs_ebpf_block *gen_block( + struct ebpf_ctx *ectx, struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_cfg_block *old_block; + struct mptcp_rbs_ebpf_block *old_eblock; + int old_capacity; + int old_next_var; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *else_eblock; + struct mptcp_rbs_smt *smt; + int temp; + int capacity; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(list, block2, { + if (block == block2) + return BLOCK_INFO(block)->eblock; + }); + ADD_BLOCK(list, block); + + /* Remember current block and its capacity */ + old_block = ectx->block; + old_eblock = ectx->eblock; + old_capacity = ectx->capacity; + old_next_var = ectx->next_var; + + /* Create eBPF block */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + block->tag = kzalloc(sizeof(struct block_info), GFP_KERNEL); + BLOCK_INFO(block)->eblock = eblock; + ectx->block = block; + ectx->eblock = eblock; + ectx->capacity = 0; + + /* Generate code of the statements */ + ectx->next_var = -1; + smt = block->first_smt; + while (smt) { + /* Make sure that no statement follows after a var statement + * with *_NEXT value + */ + BUG_ON(ectx->next_var != -1); + + gen_smt(ectx, smt); + smt = smt->next; + } + + /* Evaluate condition if there is one */ + if (block->condition) { + if (ectx->next_var != -1) { + /* Must be part of a foreach loop */ + BUG_ON(block->condition->kind != + VALUE_KIND_IS_NOT_NULL); + BUG_ON(((const struct mptcp_rbs_value_is_not_null *) + block->condition) + ->operand->kind != VALUE_KIND_SBF_VAR && + ((const struct mptcp_rbs_value_is_not_null *) + block->condition) + ->operand->kind != VALUE_KIND_SKB_VAR); + + /* Release temporaries of foreach loop because the else + * branch is outside of it + */ + dereserve_all(ectx, + BLOCK_INFO(block)->reserved_temps_map); + + else_eblock = BLOCK_INFO(block)->break_eblock; + capacity = 0; + add_instr(else_eblock, &capacity, EBPF_JMP_OFF()); + + /* Set the jump target of the null block after the whole + * loop + */ + if (ectx->next_var_null_eblock) + ectx->next_var_null_eblock->next = else_eblock; + } else { + else_eblock = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(else_eblock, &capacity, EBPF_JMP_OFF()); + + temp = reserve(ectx); + gen_value(ectx, + (struct mptcp_rbs_value *) block->condition, + temp, else_eblock); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + dereserve(ectx, temp); + ectx->eblock->next_else = else_eblock; + } + + if (block->next_else) { + else_eblock->next = + gen_block(ectx, block->next_else, list); + } else { + else_eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(else_eblock->next, &capacity, EBPF_EXIT()); + } + + if (ectx->next_var != -1) { + /* The next branch is in the foreach loop -> reserve the + * temporaries of the foreach loop again + */ + reserve_all(ectx, + BLOCK_INFO(block)->reserved_temps_map); + } + } else + BUG_ON(ectx->next_var != -1); + + /* Add jump to next block */ + if (block->next) { + /* Check if the jump target is a loop start */ + bool is_loop_start = false; + FOREACH_BLOCK(list, block2, { + if (block->next == block2) { + is_loop_start = + BLOCK_INFO(block2)->break_eblock != NULL; + break; + } + }); + + if (is_loop_start) { + /* If there exists a path from block->next to block + * without using the break block we have to use the + * continue block + */ + if (path_exists(block->next->next, block)) + ectx->eblock->next = + BLOCK_INFO(block->next)->cont_eblock; + else + ectx->eblock->next = + BLOCK_INFO(block->next)->eblock; + } else + ectx->eblock->next = gen_block(ectx, block->next, list); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + } else + add_instr_ectx(ectx, EBPF_EXIT()); + + /* Reset current block and its capacity */ + if (ectx->next_var != -1) { + /* Release temporaries of foreach loop because we are now + * outside of it + */ + dereserve_all(ectx, BLOCK_INFO(block)->reserved_temps_map); + } + + ectx->block = old_block; + ectx->eblock = old_eblock; + ectx->capacity = old_capacity; + ectx->next_var = old_next_var; + + return eblock; +} + +/** + * Generates the eBPF instructions for a CFG + * @ectx: The generation context + * @return: The first eBPF block + */ +static struct mptcp_rbs_ebpf_block *gen(struct ebpf_ctx *ectx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_ebpf_block *first_eblock; + struct mptcp_rbs_cfg_block *block; + + /* Generate start block that puts values in fixed temporaries */ + first_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = first_eblock; + ectx->capacity = 0; + ectx->used_temps = FIXED_TMP_COUNT; + ectx->used_temps_map = ((u64) -1ll) >> (64 - FIXED_TMP_COUNT); + add_instr_ectx(ectx, EBPF_MOV_RAW_REG(CTX_TMP, BPF_REG_ARG1)); + add_instr_ectx(ectx, EBPF_MOV_REG(VARS_TMP, CTX_TMP)); + add_instr_ectx(ectx, + EBPF_ALU_IMM(BPF_ADD, VARS_TMP, + offsetof(struct mptcp_rbs_eval_ctx, vars))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), REGS_TMP, CTX_TMP, + offsetof(struct mptcp_rbs_eval_ctx, rbs_cb))); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_ADD, REGS_TMP, + offsetof(struct mptcp_rbs_cb, regs))); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + INIT_BLOCK_LIST(&list); + first_eblock->next = + gen_block(ectx, ectx->ctx->variation->first_block, &list); + FOREACH_BLOCK(&list, block, kfree(block->tag)); + FREE_BLOCK_LIST(&list); + + return first_eblock; +} + +void mptcp_rbs_opt_ebpf(struct mptcp_rbs_opt_ctx *ctx) +{ + struct ebpf_ctx ectx; + struct bpf_prog *prog; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_ebpf_block *first_eblock; + + /* Generate code */ + memset(&ectx, 0, sizeof(struct ebpf_ctx)); + ectx.ctx = ctx; + INIT_FILTER_VAR_LIST(&ectx.filter_var_list); + first_eblock = gen(&ectx); + FREE_FILTER_VAR_LIST(&ectx.filter_var_list); + + /* Register functions that can be called from eBPF */ + prog = bpf_prog_alloc(bpf_prog_size(1), 0); + atomic_set(&prog->aux->refcnt, 1); + prog->gpl_compatible = true; + prog->aux->ops = &bpf_ops; + prog->type = BPF_PROG_TYPE_RBS; + + /* Run register allocator */ + prog = mptcp_rbs_ebpf_alloc_regs(first_eblock, ectx.used_temps, prog); + mptcp_rbs_ebpf_blocks_free(first_eblock); + + /* JIT the result */ + bpf_prog_select_runtime(prog); + + /* Create eBPF statement and replace whole CFG with it */ + mptcp_rbs_cfg_blocks_free(ctx->variation->first_block); + + block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + block->first_smt = (struct mptcp_rbs_smt *) mptcp_rbs_smt_ebpf_new( + prog, ectx.strs, ectx.strs_len); + ctx->variation->first_block = block; +} + +static struct bpf_prog_type_list rbs_bpf_tl = { + .ops = &bpf_ops, + .type = BPF_PROG_TYPE_RBS, +}; + +static int __init register_ebpf_prog_type(void) +{ + bpf_register_prog_type(&rbs_bpf_tl); + return 0; +} +module_init(register_ebpf_prog_type); diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf.h b/net/mptcp/mptcp_rbs_optimizer_ebpf.h new file mode 100644 index 0000000000000..988d0bc7ff52b --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_H + +struct mptcp_rbs_opt_ctx; + +/** + * eBPF code generation: + * Generates eBPF code and replaces the existing CFG with it + * @ctx: The optimization context + */ +void mptcp_rbs_opt_ebpf(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c new file mode 100644 index 0000000000000..8ac5cd00b81eb --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c @@ -0,0 +1,309 @@ +#include "mptcp_rbs_optimizer_ebpf_disasm.h" +#include "mptcp_rbs_parser.h" +#include +#include + +int mptcp_rbs_ebpf_dump(const struct bpf_prog *prog, char *buffer) +{ + int len = 0; + int i; + +#define PRINT(fmt, ...) len += sprintf_null(&buffer, fmt, ##__VA_ARGS__) +#define PRINT_ALU(op) \ + if (BPF_SRC(instr->code)) \ + PRINT(op "%s r%d, r%d\n", cl == BPF_ALU ? "w" : "", \ + instr->dst_reg, instr->src_reg); \ + else \ + PRINT(op "%s r%d, %d\n", cl == BPF_ALU ? "w" : "", \ + instr->dst_reg, instr->imm); +#define PRINT_JMP(op) \ + if (BPF_SRC(instr->code)) \ + PRINT(op " r%d, r%d, 0x%x\n", instr->dst_reg, instr->src_reg, \ + (i + instr->off + 1) * sizeof(struct bpf_insn)); \ + else \ + PRINT(op " r%d, %d, 0x%x\n", instr->dst_reg, instr->imm, \ + (i + instr->off + 1) * sizeof(struct bpf_insn)); +#define PRINT_LDX(op) \ + if (instr->off > 0) \ + PRINT(op " r%d, [r%d + %d]\n", instr->dst_reg, instr->src_reg, \ + instr->off); \ + else if (instr->off < 0) \ + PRINT(op " r%d, [r%d - %d]\n", instr->dst_reg, instr->src_reg, \ + instr->off * -1); \ + else \ + PRINT(op " r%d, [r%d]\n", instr->dst_reg, instr->src_reg); +#define PRINT_ST(op) \ + if (instr->off > 0) \ + PRINT(op " [r%d + %d], %d\n", instr->dst_reg, instr->off, \ + instr->imm); \ + else if (instr->off < 0) \ + PRINT(op " [r%d - %d], %d\n", instr->dst_reg, instr->off * -1, \ + instr->imm); \ + else \ + PRINT(op " [r%d], %d\n", instr->dst_reg, instr->imm); +#define PRINT_STX(op) \ + if (instr->off > 0) \ + PRINT(op " [r%d + %d], r%d\n", instr->dst_reg, instr->off, \ + instr->src_reg); \ + else if (instr->off < 0) \ + PRINT(op " [r%d - %d], r%d\n", instr->dst_reg, \ + instr->off * -1, instr->src_reg); \ + else \ + PRINT(op " [r%d], r%d\n", instr->dst_reg, instr->src_reg); + + PRINT("eBPF\n\n"); + for (i = 0; i < prog->len; ++i) { + const struct bpf_insn *instr = &prog->insnsi[i]; + int cl = BPF_CLASS(instr->code); + + PRINT("0x%06x ", i * sizeof(struct bpf_insn)); + + switch (cl) { + case BPF_ALU: + case BPF_ALU64: { + switch (BPF_OP(instr->code)) { + case BPF_ADD: { + PRINT_ALU("add"); + break; + } + case BPF_SUB: { + PRINT_ALU("sub"); + break; + } + case BPF_MUL: { + PRINT_ALU("mul"); + break; + } + case BPF_DIV: { + PRINT_ALU("div"); + break; + } + case BPF_OR: { + PRINT_ALU("or"); + break; + } + case BPF_AND: { + PRINT_ALU("and"); + break; + } + case BPF_LSH: { + PRINT_ALU("lsh"); + break; + } + case BPF_RSH: { + PRINT_ALU("rsh"); + break; + } + case BPF_NEG: { + PRINT("neg%s r%d\n", cl == BPF_ALU ? "w" : "", + instr->dst_reg); + break; + } + case BPF_MOD: { + PRINT_ALU("mod"); + break; + } + case BPF_XOR: { + PRINT_ALU("xor"); + break; + } + case BPF_MOV: { + PRINT_ALU("mov"); + break; + } + case BPF_ARSH: { + PRINT_ALU("arsh"); + break; + } + case BPF_END: { + PRINT("%s%d r%d\n", + BPF_SRC(instr->code) ? "be" : "le", + instr->imm, instr->dst_reg); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + case BPF_JMP: { + switch (BPF_OP(instr->code)) { + case BPF_JA: { + PRINT("ja 0x%x\n", (i + instr->off + 1) * + sizeof(struct bpf_insn)); + break; + } + case BPF_JEQ: { + PRINT_JMP("jeq"); + break; + } + case BPF_JGT: { + PRINT_JMP("jgt"); + break; + } + case BPF_JGE: { + PRINT_JMP("jge"); + break; + } + case BPF_JSET: { + PRINT_JMP("jset"); + break; + } + case BPF_JNE: { + PRINT_JMP("jne"); + break; + } + case BPF_JSGT: { + PRINT_JMP("jsgt"); + break; + } + case BPF_JSGE: { + PRINT_JMP("jsge"); + break; + } + case BPF_CALL: { + PRINT("call %d\n", instr->imm); + break; + } + case BPF_EXIT: { + PRINT("exit\n"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + case BPF_LD: { + if (BPF_MODE(instr->code) == BPF_IMM && + BPF_SIZE(instr->code) == BPF_DW && + i + 1 < prog->len) { + ++i; + PRINT("ld r%d, %lld\n", instr->dst_reg, + instr->imm | + (((s64) prog->insnsi[i].imm) << 32)); + } else + PRINT("???\n"); + break; + } + case BPF_LDX: { + if (BPF_MODE(instr->code) == BPF_MEM) { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_LDX("ldxw"); + break; + } + case BPF_H: { + PRINT_LDX("ldxh"); + break; + } + case BPF_B: { + PRINT_LDX("ldxb"); + break; + } + case BPF_DW: { + PRINT_LDX("ldx"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + } else + PRINT("???\n"); + break; + } + case BPF_ST: { + if (BPF_MODE(instr->code) == BPF_MEM) { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_ST("stw"); + break; + } + case BPF_H: { + PRINT_ST("sth"); + break; + } + case BPF_B: { + PRINT_ST("stb"); + break; + } + case BPF_DW: { + PRINT_ST("st"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + } else + PRINT("???\n"); + break; + } + case BPF_STX: { + switch (BPF_MODE(instr->code)) { + case BPF_XADD: { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_STX("xaddw"); + break; + } + case BPF_DW: { + PRINT_STX("xadd"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + case BPF_MEM: { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_STX("stxw"); + break; + } + case BPF_H: { + PRINT_STX("stxh"); + break; + } + case BPF_B: { + PRINT_STX("stxb"); + break; + } + case BPF_DW: { + PRINT_STX("stx"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + default: { + PRINT("???\n"); + break; + } + } + + break; + } + default: { + PRINT("???\n"); + break; + } + } + } + + return len; +} diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h new file mode 100644 index 0000000000000..057a1395a4cd4 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h @@ -0,0 +1,14 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_DISASM_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_DISASM_H + +struct bpf_prog; + +/** + * Writes a string representation of an eBPF program to the given buffer + * @prog: The eBPF program + * @buffer: Pointer to the buffer where the string should be stored or NULL + * Return: Number of written characters + */ +int mptcp_rbs_ebpf_dump(const struct bpf_prog *prog, char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c new file mode 100644 index 0000000000000..02245f450e907 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c @@ -0,0 +1,413 @@ +#include "mptcp_rbs_optimizer_ebpf_lse.h" +#include +#include +#include + +struct instr_info { + u8 visited : 1, is_jump_target : 1, deleted : 1; + /* 0 = unused, 255 = multiple values */ + u8 reg_states[__MAX_BPF_REG]; +}; + +#define VAR_STATE 255 + +static bool includes(const u8 *reg_states1, const u8 *reg_states2) +{ + int i; + + for (i = 0; i < __MAX_BPF_REG; ++i) { + int reg_state1 = reg_states1[i]; + int reg_state2 = reg_states2[i]; + + if (reg_state1 != reg_state2 && reg_state1 != VAR_STATE && + reg_state2) + return false; + } + + return true; +} + +static void unite(u8 *reg_states_dst, const u8 *reg_states_src) +{ + int i; + + for (i = 0; i < __MAX_BPF_REG; ++i) { + int reg_state_dst = reg_states_dst[i]; + int reg_state_src = reg_states_src[i]; + + if (reg_state_src && reg_state_dst != reg_state_src) { + if (!reg_state_dst) + reg_states_dst[i] = reg_state_src; + else + reg_states_dst[i] = VAR_STATE; + } + } +} + +static void fill_infos(struct bpf_prog *prog, int pos, struct instr_info *infos, + const u8 *start_reg_states) +{ + u8 reg_states[__MAX_BPF_REG]; + if (start_reg_states) + memcpy(reg_states, start_reg_states, sizeof(reg_states)); + else + memset(reg_states, 0, sizeof(reg_states)); + + for (; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + info->visited = true; + unite(info->reg_states, reg_states); + + switch (BPF_CLASS(insn->code)) { + case BPF_LD: { + reg_states[insn->dst_reg] = VAR_STATE; + break; + } + case BPF_LDX: { + if (insn->src_reg == BPF_REG_10) + reg_states[insn->dst_reg] = + insn->off / -((int) sizeof(u64)) + 1; + else + reg_states[insn->dst_reg] = VAR_STATE; + break; + } + case BPF_ST: + case BPF_STX: { + if (insn->dst_reg == BPF_REG_10) { + int i; + int val = insn->off / -((int) sizeof(u64)) + 1; + + for (i = 0; i < __MAX_BPF_REG; ++i) { + if (i != insn->src_reg && + reg_states[i] == val) + reg_states[i] = VAR_STATE; + } + + if (BPF_CLASS(insn->code) == BPF_STX) + reg_states[insn->src_reg] = val; + } + break; + } + case BPF_ALU: + case BPF_ALU64: { + /* If the 2. operand is a constant check for typical + * values that won't change the result + */ + if (!BPF_SRC(insn->code)) { + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_OR: + case BPF_LSH: + case BPF_RSH: + case BPF_XOR: + case BPF_ARSH: { + if (!insn->imm) + continue; + break; + } + case BPF_MUL: + case BPF_DIV: { + if (insn->imm == 1) + continue; + break; + } + case BPF_AND: { + if (insn->imm == -1) + continue; + break; + } + } + } + + if (BPF_SRC(insn->code) && + BPF_OP(insn->code) == BPF_MOV) + reg_states[insn->dst_reg] = + reg_states[insn->src_reg]; + else + reg_states[insn->dst_reg] = VAR_STATE; + break; + } + case BPF_JMP: { + switch (BPF_OP(insn->code)) { + case BPF_JA: { + int target_pos = pos + insn->off + 1; + struct instr_info *target_info = + &infos[target_pos]; + + target_info->is_jump_target = true; + + if (target_info->visited && + includes(target_info->reg_states, + reg_states)) + return; + + pos = target_pos - 1; + break; + } + case BPF_JEQ: + case BPF_JGT: + case BPF_JGE: + case BPF_JSET: + case BPF_JNE: + case BPF_JSGT: + case BPF_JSGE: { + int target_pos = pos + insn->off + 1; + struct instr_info *target_info = + &infos[target_pos]; + + target_info->is_jump_target = true; + + if (!target_info->visited || + !includes(target_info->reg_states, + reg_states)) + fill_infos(prog, target_pos, infos, + reg_states); + + break; + } + case BPF_CALL: { + reg_states[BPF_REG_0] = VAR_STATE; + reg_states[BPF_REG_1] = VAR_STATE; + reg_states[BPF_REG_2] = VAR_STATE; + reg_states[BPF_REG_3] = VAR_STATE; + reg_states[BPF_REG_4] = VAR_STATE; + reg_states[BPF_REG_5] = VAR_STATE; + break; + } + case BPF_EXIT: { + return; + } + } + break; + } + } + } +} + +static void print_infos(const struct bpf_prog *prog, + const struct instr_info *infos) +{ + int i; + int j; + + for (i = 0; i < prog->len; ++i) { + const struct instr_info *info = &infos[i]; + + printk("0x%6x v=%d,jt=%d,d=%d", i * 8, info->visited, + info->is_jump_target, info->deleted); + for (j = 0; j < __MAX_BPF_REG; ++j) { + printk(",r%d=%d", j, info->reg_states[j]); + } + printk("\n"); + } +} + +static void find_deletable(struct bpf_prog *prog, struct instr_info *infos) +{ + int pos; + int count = 0; + + for (pos = 0; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + switch (BPF_CLASS(insn->code)) { + case BPF_LDX: { + if (insn->src_reg == BPF_REG_10 && + info->reg_states[insn->dst_reg] == + insn->off / -((int) sizeof(u64)) + 1) { + info->deleted = true; + ++count; + } + break; + } + case BPF_STX: { + if (insn->dst_reg == BPF_REG_10 && + info->reg_states[insn->src_reg] == + insn->off / -((int) sizeof(u64)) + 1) { + info->deleted = true; + ++count; + } + break; + } + case BPF_ALU: + case BPF_ALU64: { + /* If the 2. operand is a constant check for typical + * values that won't change the result + */ + if (!BPF_SRC(insn->code)) { + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_OR: + case BPF_LSH: + case BPF_RSH: + case BPF_XOR: + case BPF_ARSH: { + if (!insn->imm) { + info->deleted = true; + ++count; + } + break; + } + case BPF_MUL: + case BPF_DIV: { + if (insn->imm == 1) { + info->deleted = true; + ++count; + } + break; + } + case BPF_AND: { + if (insn->imm == -1) { + info->deleted = true; + ++count; + } + break; + } + } + } else if (BPF_OP(insn->code) == BPF_MOV) { + if (info->reg_states[insn->dst_reg] == + info->reg_states[insn->src_reg] && + info->reg_states[insn->dst_reg] != + VAR_STATE && + info->reg_states[insn->src_reg]) { + info->deleted = true; + ++count; + } + } + + break; + } + } + } + + for (pos = 0; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + if (BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) >= BPF_JA && + BPF_OP(insn->code) <= BPF_JSGE) { + /* Find jump target that is not deleted and no + * unconditional jump + */ + int target_pos = pos + insn->off + 1; + int pos2; + bool found; + + while (true) { + struct bpf_insn *insn2 = + &prog->insnsi[target_pos]; + + if (BPF_CLASS(insn2->code) == BPF_JMP && + BPF_OP(insn2->code) == BPF_JA) + target_pos += insn2->off + 1; + else if (infos[target_pos].deleted) + ++target_pos; + else + break; + } + + /* If instruction jumps only to the following -> delete + */ + pos2 = min(pos + 1, target_pos); + found = false; + while (pos2 < max(pos + 1, target_pos)) { + if (!infos[pos2].deleted) { + found = true; + break; + } + + ++pos2; + } + + if (!found) { + /* Jump not necessary */ + info->deleted = true; + ++count; + } + } + } +} + +static void delete_deletable(struct bpf_prog *prog, struct instr_info *infos) +{ + struct bpf_insn *new_insns; + struct bpf_insn *new_insn; + int pos; + int len; + + new_insns = kmalloc(sizeof(struct bpf_insn) * prog->len, GFP_KERNEL); + new_insn = new_insns; + + for (pos = 0; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + if (info->deleted) + continue; + + *new_insn = *insn; + if (BPF_CLASS(new_insn->code) == BPF_JMP) { + /* Find jump target that is not deleted and no + * unconditional jump + */ + int target_pos = pos + new_insn->off + 1; + int pos2; + + while (true) { + insn = &prog->insnsi[target_pos]; + info = &infos[target_pos]; + + if (BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_JA) + target_pos += insn->off + 1; + else if (info->deleted) + ++target_pos; + else + break; + } + + /* Fix jump offset */ + new_insn->off = target_pos - pos - 1; + + if (new_insn->off >= 0) { + for (pos2 = pos + 1; pos2 < target_pos; + ++pos2) { + if (infos[pos2].deleted) + --new_insn->off; + } + } else { + for (pos2 = pos - 1; pos2 > target_pos; + --pos2) { + if (infos[pos2].deleted) + ++new_insn->off; + } + } + } + + ++new_insn; + } + + len = new_insn - new_insns; + bpf_prog_realloc(prog, bpf_prog_size(len), 0); + memcpy(prog->insnsi, new_insns, len * sizeof(struct bpf_insn)); + prog->len = len; + kfree(new_insns); +} + +void mptcp_rbs_optimize_ebpf_ld_sts(struct bpf_prog *prog) +{ + struct instr_info *infos; + + infos = kzalloc(sizeof(struct instr_info) * prog->len, GFP_KERNEL); + fill_infos(prog, 0, infos, NULL); + find_deletable(prog, infos); + delete_deletable(prog, infos); + + kfree(infos); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h new file mode 100644 index 0000000000000..7d01e23f03f18 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h @@ -0,0 +1,12 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_LSE_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_LSE_H + +struct bpf_prog; + +/** + * Removes unnecessary loads and stores inside an eBPF program + * @prog: Pointer to the eBPF program + */ +void mptcp_rbs_optimize_ebpf_ld_sts(struct bpf_prog *prog); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c new file mode 100644 index 0000000000000..fa33012759c4e --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c @@ -0,0 +1,1169 @@ +#include "mptcp_rbs_optimizer_ebpf_regalloc.h" +#include "mptcp_rbs_dynarray.h" +#include "mptcp_rbs_optimizer_ebpf_lse.h" +#include +#include + +#define IS_OFF_JMP(insn) \ + (BPF_CLASS((insn)->code) == BPF_JMP && BPF_OP((insn)->code) <= BPF_JSGE) +#define TEMP_TO_BIT(temp) (1ull << (temp)) +#define BIT_DIFF(a, b) ((a) & ~(b)) + +/* + * eBPF block lists + */ + +DECL_DA(block_list, struct mptcp_rbs_ebpf_block *); + +#define INIT_BLOCK_LIST(list) INIT_DA(list) + +#define FREE_BLOCK_LIST(list) FREE_DA(list) + +#define ADD_BLOCK(list, block) ADD_DA_ITEM(list, block) + +#define GET_BLOCK_LIST_LEN(list) GET_DA_LEN(list) + +#define GET_BLOCK(list, index) GET_DA_ITEM(list, index) + +#define FOREACH_BLOCK(list, var, cmds) FOREACH_DA_ITEM(list, var, cmds) + +#define FOREACH_BLOCK_REV(list, var, cmds) FOREACH_DA_ITEM_REV(list, var, cmds) + +/* + * Critical edge block lists + */ + +DECL_DA(edge_block_list, struct edge_block *); + +#define INIT_EDGE_BLOCK_LIST(list) INIT_DA(list) + +#define FREE_EDGE_BLOCK_LIST(list) FREE_DA(list) + +#define ADD_EDGE_BLOCK(list, block) ADD_DA_ITEM(list, block) + +#define GET_EDGE_BLOCK_LIST_LEN(list) GET_DA_LEN(list) + +#define GET_EDGE_BLOCK(list, index) GET_DA_ITEM(list, index) + +enum { + /** Temporary was not written yet */ + TEMP_STATE_NONE, + /** Temporary is spilled */ + TEMP_STATE_SPILLED, + /** Temporary is in register */ + TEMP_STATE_REG +}; + +/** Macro to determine the offset on the stack where the temporary can be + * spilled + */ +#define TEMP_STACK_OFF(temp) ((temp + 1) * -8) + +/** The state of a temporary */ +struct temp_state { + /** + * State of the temporary (see TEMP_STATE_* and the register number it + * is in + */ + u8 state : 2, reg : 6; +}; + +/** A single live range. Organized in a linked list */ +struct live_range { + struct live_range *next; + /** + * Start position of the range and a bit determining if the temporary is + * written at the position + */ + u32 def : 1, start : 31; + /** End position of the range - 1 */ + u32 end; +}; + +/** Information about a temporary */ +struct temp_info { + /** Live interval of the temporary */ + struct live_range *live_interval; + /** The previous active live range */ + struct live_range *prev_range; +}; + +/** Context for register allocation */ +struct ctx { + /** The eBPF program where the instructions should be stored */ + struct bpf_prog *prog; + /** List with eBPF blocks in depth first order */ + struct block_list blocks; + /** List with critical edge blocks */ + struct edge_block_list edge_blocks; + /** The current block */ + struct mptcp_rbs_ebpf_block *block; + /** Information of the current block */ + // struct block_info *block_info; + /** Current eBPF instruction */ + struct mptcp_rbs_ebpf_instr *instr; + /** The ARE_CONSISTENT set */ + u64 A; + /** Position of current instruction */ + u32 pos; + /** Information about temporaries */ + struct { + /** Information about temporaries */ + struct temp_info *infos[MAX_TEMPS]; + /** State of temporaries */ + struct temp_state states[MAX_TEMPS]; + /** Number of used temporaries */ + u8 count; + } temps; + /** + * Array to map registers to the temporary that is currently placed in + + * 1. 0 means that no temporary is stored. R10 is reserved for the stack + * pointer + */ + u8 regs[MAX_BPF_REG - 1]; +}; + +/** Block that is inserted by the allocator on a critical edge */ +struct edge_block { + /** Absolute position of the first instruction of the block */ + u32 insn_pos; + /** + * Number of instructions in this block. This is important to calculate + * jump offsets + */ + u16 insn_count; + /** Index of the block in the edge block list */ + u16 idx; +}; + +/** Per block information for register allocation */ +struct block_info { + /** List with predecessor blocks */ + struct block_list preds; + /** USE set to calculate lifetimes */ + u64 use; + /** DEF set to calculate lifetimes */ + u64 def; + /** IN set for the data flow analysis */ + u64 in; + /** OUT set for the data flow analysis */ + u64 out; + /** Local copy of the ARE_CONSISTENT set */ + u64 A; + /** The KILL set */ + u64 K; + /** Absolute position of the first instruction of the block */ + u32 insn_pos; + /** + * Number of instructions in this block. This is important to calculate + * jump offsets + */ + u16 insn_count; + /** Index of the block in the list */ + u16 idx; + /** + * Critical edge block of the else edge of this block. If edge_block. + * insn_count == 0 there is no critical edge block + */ + struct edge_block edge_block; + /** Position of the first instruction of the block */ + u16 pos; + /** true if jump is necessary on the end of the block */ + bool needs_jmp; + /** Information about temporaries */ + struct { + /** State of temporaries when the block is entered */ + struct temp_state entry_states[MAX_TEMPS]; + /** State of temporaries when the block is left */ + struct temp_state exit_states[MAX_TEMPS]; + } temps; +}; + +#define BLOCK_INFO(block) ((struct block_info *) (block)->tag) + +/** + * Traverses through eBPF blocks and stores them inside a list in depth first + * order + * @ctx: The allocator context + * @block: The current block + */ +static void traverse(struct ctx *ctx, struct mptcp_rbs_ebpf_block *block) +{ + struct mptcp_rbs_ebpf_block *block2; + struct block_info *info; + u8 code; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(&ctx->blocks, block2, if (block == block2) return ); + + /* Check if every block has a jump instruction at the end */ + code = block->instrs[block->instr_count - 1].insn.code; + BUG_ON(BPF_CLASS(code) != BPF_JMP); + BUG_ON(BPF_OP(code) != BPF_JA && BPF_OP(code) != BPF_EXIT); + + info = kzalloc(sizeof(struct block_info), GFP_KERNEL); + INIT_BLOCK_LIST(&info->preds); + info->idx = GET_BLOCK_LIST_LEN(&ctx->blocks); + info->pos = ctx->pos; + block->tag = info; + ADD_BLOCK(&ctx->blocks, block); + + ctx->pos += block->instr_count; + + if (block->next) + traverse(ctx, block->next); + if (block->next_else) + traverse(ctx, block->next_else); +} + +/** + * Calculates the lifetimes of all used temporaries and stores them as struct + * lifetime_block_info in the blocks' tag fields + * @ctx: The allocator context + */ +static void calc_lifetimes(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block *block; + bool changes; + + /* Calculate USE and DEF sets */ + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + struct mptcp_rbs_ebpf_instr *cur; + struct mptcp_rbs_ebpf_instr *end; + + cur = block->instrs; + end = cur + block->instr_count; + for (; cur != end; ++cur) { + u64 read = 0; + u64 write = 0; + int i; + + for (i = 0; i < MAX_ARGS; ++i) { + if (cur->read[i].used) + read |= TEMP_TO_BIT(cur->read[i].temp); + } + if (cur->write.used) + write = TEMP_TO_BIT(cur->write.temp); + + info->use |= ~info->def & read; + info->def |= ~info->use & write; + } + }); + + /* Iterate until no changes for IN(B) were found */ + do { + changes = false; + + FOREACH_BLOCK_REV(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + u64 old_in = info->in; + + /* OUT(B) = U IN(S) where S elementof succ(B) */ + info->out = + block->next ? BLOCK_INFO(block->next)->in : 0; + if (block->next_else) + info->out |= BLOCK_INFO(block->next_else)->in; + + /* IN(B) = use(B) U (OUT(B) - def(B)) */ + info->in = info->use | BIT_DIFF(info->out, info->def); + + changes = changes || old_in != info->in; + }); + } while (changes); +} + +/** + * Calculates the live ranges of all used temporaries and stores them in the + * context + * @ctx: The allocator context + */ +static void calc_liveranges(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block *block; + struct live_range *last_live_ranges[MAX_TEMPS]; + u32 pos; + + /* Calculate the normal life times first */ + calc_lifetimes(ctx); + + /* Calculate the live ranges */ + memset(last_live_ranges, 0, sizeof(last_live_ranges)); + pos = 0; + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + struct mptcp_rbs_ebpf_instr *cur; + struct mptcp_rbs_ebpf_instr *end; + struct live_range *range; + int i; + + /* Create live ranges without end position for live temporaries + * at block entry + */ + for (i = 0; i < ctx->temps.count; ++i) { + if (info->in & TEMP_TO_BIT(i)) { + range = kzalloc(sizeof(struct live_range), + GFP_KERNEL); + range->start = pos; + if (last_live_ranges[i]) + last_live_ranges[i]->next = range; + else + ctx->temps.infos[i]->live_interval = + range; + last_live_ranges[i] = range; + } + } + + cur = block->instrs; + end = cur + block->instr_count; + for (; cur != end; ++cur, ++pos) { + for (i = 0; i < MAX_ARGS; ++i) { + if (cur->read[i].used) { + BUG_ON(!last_live_ranges[cur->read[i] + .temp]); + + last_live_ranges[cur->read[i].temp] + ->end = pos - 1; + } + } + + if (cur->write.used) { + /* Check if there is already an open live range + */ + i = cur->write.temp; + + range = last_live_ranges[i]; + if (range && !range->end) { + /* Reuse live range */ + } else { + range = + kzalloc(sizeof(struct live_range), + GFP_KERNEL); + if (last_live_ranges[i]) + last_live_ranges[i]->next = + range; + else + ctx->temps.infos[i] + ->live_interval = range; + last_live_ranges[i] = range; + } + + range->def = true; + range->start = pos; + } + } + + /* Set the end positions of ranges for temporaries that are live + * at the end of the block + */ + for (i = 0; i < ctx->temps.count; ++i) { + if (info->out & TEMP_TO_BIT(i)) { + BUG_ON(!last_live_ranges[i]); + + last_live_ranges[i]->end = pos - 1; + } + } + }); +} + +static void insert_insn(struct ctx *ctx, int idx, struct bpf_insn insn) +{ + if (bpf_prog_size(ctx->prog->len) == ctx->prog->pages * PAGE_SIZE) { + ctx->prog = bpf_prog_realloc( + ctx->prog, bpf_prog_size(ctx->prog->len + 10), 0); + } + + if (idx != ctx->prog->len) + memmove(&ctx->prog->insnsi[idx + 1], &ctx->prog->insnsi[idx], + (ctx->prog->len - idx) * sizeof(struct bpf_insn)); + ctx->prog->insnsi[idx] = insn; + ++ctx->prog->len; +} + +static inline void add_insn(struct ctx *ctx, struct bpf_insn insn) +{ + insert_insn(ctx, ctx->prog->len, insn); +} + +static struct live_range *find_next_range(struct ctx *ctx, int temp) +{ + struct live_range *prev_range = ctx->temps.infos[temp]->prev_range; + struct live_range *range; + + if (prev_range) + range = prev_range->next; + else + range = ctx->temps.infos[temp]->live_interval; + + while (range && range->end + 1 < ctx->pos) { + prev_range = range; + range = range->next; + } + + ctx->temps.infos[temp]->prev_range = prev_range; + return range; +} + +/** + * Spills a register if necessary + * @ctx: The allocation context + * @reg: The register to spill + */ +static void spill_reg(struct ctx *ctx, int reg) +{ + struct block_info *info = BLOCK_INFO(ctx->block); + int temp = ((int) ctx->regs[reg]) - 1; + struct live_range *range; + bool store = true; + + if (temp == -1) { + /* Register is empty */ + return; + } + + /* If the register is clean omit the store */ + if (ctx->A & TEMP_TO_BIT(temp)) + store = false; + else { + /* If the temporary's value won't be used in the future omit the + * store + */ + if (!(info->out & TEMP_TO_BIT(temp))) { + range = find_next_range(ctx, temp); + + if (!range || + range->start >= + info->pos + ctx->block->instr_count || + (range->def && range->start >= ctx->pos)) + store = false; + } + } + + if (store) { + add_insn(ctx, BPF_STX_MEM(BPF_DW, BPF_REG_FP, reg, + TEMP_STACK_OFF(temp))); + ++info->insn_count; + ctx->A |= TEMP_TO_BIT(temp); + } + + ctx->temps.states[temp].state = TEMP_STATE_SPILLED; + ctx->regs[reg] = 0; +} + +/** + * Loads a temporary in a certain register. If its state != TEMP_STATE_NONE the + * function will insert a load from the spill location + * @ctx: The allocation context + * @reg: Register that should contain the temporary + * @temp: The temporary that should be loaded into the register + */ +static void load_in_reg(struct ctx *ctx, int reg, int temp) +{ + struct temp_state *state = &ctx->temps.states[temp]; + + BUG_ON(state->state == TEMP_STATE_REG); + + /* Spill register if necessary */ + spill_reg(ctx, reg); + + if (state->state != TEMP_STATE_NONE) { + add_insn(ctx, BPF_LDX_MEM(BPF_DW, reg, BPF_REG_FP, + TEMP_STACK_OFF(temp))); + ++BLOCK_INFO(ctx->block)->insn_count; + ctx->A |= TEMP_TO_BIT(temp); + } else + ctx->A &= ~TEMP_TO_BIT(temp); + + /* Assign register to temporary */ + ctx->regs[reg] = temp + 1; + state->state = TEMP_STATE_REG; + state->reg = reg; +} + +/** + * The heuristic function of the allocator. This function decides which register + * to allocate for a temporary + * @ctx: The allocation context + * @temp: The temporary the function should find a register for + * @return: The register to allocate + */ +static int find_reg(struct ctx *ctx, int temp) +{ + struct mptcp_rbs_ebpf_instr *instr = ctx->instr; + int start; + int end; + int reg; + int temp2; + struct live_range *range; + int i; + int prop_reg = -1; + int prop_delta = -1; + int prop_reg2 = -1; + int prop_delta2 = -1; + bool used; + + /* Check the callee saved registers first */ + start = BPF_REG_6; + end = BPF_REG_9; + + while (true) { + for (reg = start; reg <= end; ++reg) { + temp2 = ((int) ctx->regs[reg]) - 1; + + if (temp2 == -1) { + /* Register is free */ + return reg; + } + + /* Check if register is already used by another operand + */ + used = false; + for (i = 0; i < MAX_ARGS; ++i) { + if (instr->read[i].used && + instr->read[i].temp == temp2) { + used = true; + break; + } + } + if (used) + continue; + + /* Check live ranges */ + range = find_next_range(ctx, temp2); + + if (!range) { + /* Best candidate found because the temporary + * won't be used again + */ + return reg; + } + if (range->start > ctx->pos) { + /* Very good candidate found */ + int delta = range->start - ctx->pos; + if (prop_delta < delta) { + prop_reg = reg; + prop_delta = range->start - ctx->pos; + } + } else { + /* Candidate found */ + int delta = range->end - ctx->pos; + if (prop_delta2 < delta) { + prop_reg2 = reg; + prop_delta2 = range->end - ctx->pos; + } + } + } + + if (start == BPF_REG_0) + break; + + /* Next check all other registers except R10 */ + start = BPF_REG_0; + end = BPF_REG_5; + } + + if (prop_reg != -1) + return prop_reg; + + BUG_ON(prop_reg2 == -1); + return prop_reg2; +} + +/** + * Generalized version of load_in_reg where the function decides the register. + * This function ensures that other instruction operands are not spilled + * @ctx: The allocation context + * @temp: The temporary that should be loaded in a register + */ +static void load_in_any_reg(struct ctx *ctx, int temp) +{ + load_in_reg(ctx, find_reg(ctx, temp), temp); +} + +/** + * Allocates registers for the current call instruction + * @ctx: The allocation context + * @insn: The target instruction that will be added to the eBPF program + */ +static void alloc_call_instr_regs(struct ctx *ctx, struct bpf_insn *insn) +{ + struct block_info *info = BLOCK_INFO(ctx->block); + struct mptcp_rbs_ebpf_instr *instr = ctx->instr; + int temp; + struct temp_state *state; + int i; + + /* Parameters */ + for (i = 0; i < MAX_ARGS; ++i) { + if (instr->read[i].used) { + temp = instr->read[i].temp; + state = &ctx->temps.states[temp]; + + if (state->state == TEMP_STATE_REG) { + if (state->reg != BPF_REG_ARG1 + i) { + spill_reg(ctx, BPF_REG_ARG1 + i); + + /* Copy the value */ + add_insn(ctx, + BPF_MOV64_REG(BPF_REG_ARG1 + i, + state->reg)); + ++info->insn_count; + continue; + } + } else { + /* Load the value and spill it to ensure + * that it is not lost during the call + */ + load_in_reg(ctx, BPF_REG_ARG1 + i, temp); + } + } + + spill_reg(ctx, BPF_REG_ARG1 + i); + } + + /* Return value */ + if (instr->write.used) { + temp = instr->write.temp; + state = &ctx->temps.states[temp]; + + if (state->state == TEMP_STATE_REG) { + /* Discard register */ + ctx->regs[state->reg] = 0; + } + state->state = TEMP_STATE_NONE; + + load_in_reg(ctx, BPF_REG_0, temp); + + info->K |= TEMP_TO_BIT(temp); + } else + spill_reg(ctx, BPF_REG_0); +} + +/** + * Allocates registers for the current instruction + * @ctx: The allocation context + * @insn: The target instruction that will be added to the eBPF program + */ +static void alloc_instr_regs(struct ctx *ctx, struct bpf_insn *insn) +{ + struct block_info *info = BLOCK_INFO(ctx->block); + struct mptcp_rbs_ebpf_instr *instr = ctx->instr; + int temp; + int temp2; + struct temp_state *state; + struct temp_state *state2; + + if (BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_CALL) { + alloc_call_instr_regs(ctx, insn); + return; + } + + if (instr->read[0].used) { + temp = instr->read[0].temp; + state = &ctx->temps.states[temp]; + + if (instr->write.used && temp != instr->write.temp) { + temp2 = instr->write.temp; + state2 = &ctx->temps.states[temp2]; + + /* Get any register for the written temporary */ + if (state2->state != TEMP_STATE_REG) { + state2->state = TEMP_STATE_NONE; + load_in_any_reg(ctx, temp2); + } + + /* Load value of the read temporary to the written one + */ + if (state->state == TEMP_STATE_REG) + add_insn(ctx, BPF_MOV64_REG(state2->reg, + state->reg)); + else + add_insn(ctx, + BPF_LDX_MEM(BPF_DW, state2->reg, + BPF_REG_FP, + TEMP_STACK_OFF(temp))); + ++info->insn_count; + + insn->dst_reg = state2->reg; + } else { + if (state->state != TEMP_STATE_REG) + load_in_any_reg(ctx, temp); + + insn->dst_reg = state->reg; + } + } + + if (instr->write.used) { + temp = instr->write.temp; + + if (!instr->read[0].used) { + state = &ctx->temps.states[temp]; + + if (state->state != TEMP_STATE_REG) + load_in_any_reg(ctx, temp); + + insn->dst_reg = state->reg; + } + + ctx->A &= ~TEMP_TO_BIT(temp); + info->K |= TEMP_TO_BIT(temp); + } + + if (instr->read[1].used) { + state = &ctx->temps.states[instr->read[1].temp]; + + if (state->state != TEMP_STATE_REG) + load_in_any_reg(ctx, instr->read[1].temp); + + insn->src_reg = state->reg; + } +} + +/** + * Inserts a correction that was found during the resolution phase. This + * function might insert critical edge blocks if necessary + * @ctx: The allocation context + * @p: The predecessor block of the edge where the correction should be inserted + * @s: The successor block of the edge where the correction should be inserted + * @insn: The instruction that should be inserted + */ +static void insert_correction(struct ctx *ctx, struct mptcp_rbs_ebpf_block *p, + struct mptcp_rbs_ebpf_block *s, + struct bpf_insn insn) +{ + struct block_info *info = BLOCK_INFO(p); + struct block_info *info2 = BLOCK_INFO(s); + int edge_list_len = GET_EDGE_BLOCK_LIST_LEN(&ctx->edge_blocks); + int i; + + if (p->next_else == s) { + /* Critical edge */ + struct edge_block *edge = &info->edge_block; + + BUG_ON(p->next == s); + + if (!edge->insn_count) { + /* Create edge block */ + struct bpf_insn *cur; + struct bpf_insn *end; + + edge->insn_pos = ctx->prog->len; + edge->insn_count = 1; + edge->idx = edge_list_len; + ADD_EDGE_BLOCK(&ctx->edge_blocks, edge); + add_insn(ctx, BPF_JMP_OFF(info2->idx)); + + /* Find jump in s to p and replace it with jump to new + * edge block + */ + cur = &ctx->prog->insnsi[info->insn_pos + + info->insn_count - 1]; + end = cur - info->insn_count; + + for (; cur != end; --cur) { + if (IS_OFF_JMP(cur) && cur->off == info2->idx) { + cur->off = + GET_BLOCK_LIST_LEN(&ctx->blocks) + + edge->idx; + break; + } + } + } else { + /* Correct following edge block positions */ + for (i = edge->idx + 1; i < edge_list_len; ++i) { + ++GET_EDGE_BLOCK(&ctx->edge_blocks, i) + ->insn_pos; + } + } + + insert_insn(ctx, edge->insn_pos + edge->insn_count - 1, insn); + ++edge->insn_count; + return; + } + + /* Correct following block and edge block positions */ + for (i = info->idx + 1; i < GET_BLOCK_LIST_LEN(&ctx->blocks); ++i) { + ++BLOCK_INFO(GET_BLOCK(&ctx->blocks, i))->insn_pos; + } + + for (i = 0; i < edge_list_len; ++i) { + ++GET_EDGE_BLOCK(&ctx->edge_blocks, i)->insn_pos; + } + + insert_insn(ctx, info->insn_pos + info->insn_count - + (info->needs_jmp ? 1 : 0), + insn); + ++info->insn_count; +} + +/** + * Performes the resolution phase + * @ctx: The allocation context + */ +static void resolve(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block *block; + bool changes; + + /* Reset IN and OUT sets for resolution phase */ + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + info->in = 0; + info->out = 0; + }); + + /* Perform the dataflow analysis */ + do { + changes = false; + + FOREACH_BLOCK_REV(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + u64 old_in = info->in; + + /* OUT(B) = U IN(S) where S elementof succ(B) */ + info->out = + block->next ? BLOCK_INFO(block->next)->in : 0; + if (block->next_else) + info->out |= BLOCK_INFO(block->next_else)->in; + + /* IN(B) = ~K ^ A U (OUT(B) - K) */ + info->in = + (~info->K & info->A) | BIT_DIFF(info->out, info->K); + + changes = changes || old_in != info->in; + }); + } while (changes); + + /* Search different temporary storages among edges */ + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + struct mptcp_rbs_ebpf_block *block2; + + FOREACH_BLOCK(&info->preds, block2, { + struct block_info *info2 = BLOCK_INFO(block2); + u64 s = info->in & ~info2->A; + u64 stored_by_move; + int i; + + /* 0. Fill ctx->regs with registers of block2 that are + * also in registers in block + */ + memset(&ctx->regs[0], 0, sizeof(ctx->regs)); + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if (state2->state == TEMP_STATE_REG && + state->state == TEMP_STATE_REG) + ctx->regs[state2->reg] = i + 1; + } + + /* 1. Insert necessary stores */ + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if (state2->state == TEMP_STATE_REG && + (state->state != TEMP_STATE_REG || + s & TEMP_TO_BIT(i)) && + !(info2->A & TEMP_TO_BIT(i))) { + /* Insert store */ + insert_correction( + ctx, block2, block, + BPF_STX_MEM(BPF_DW, BPF_REG_FP, + state2->reg, + TEMP_STACK_OFF(i))); + } + } + + /* 2. Insert necessary moves */ + stored_by_move = 0; + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if (!(stored_by_move & TEMP_TO_BIT(i)) && + state2->state == TEMP_STATE_REG && + state->state == TEMP_STATE_REG && + state->reg != state2->reg) { + /* Insert move */ + int reg_temp = + ((int) ctx->regs[state->reg]) - 1; + if (reg_temp != -1) { + /* Other value in register */ + /* TODO We could use registers + * for swapping etc. For now we + * just store the value in + * memory + */ + if (!(info2->A & + TEMP_TO_BIT(reg_temp))) { + insert_correction( + ctx, block2, block, + BPF_STX_MEM( + BPF_DW, + BPF_REG_FP, + state->reg, + TEMP_STACK_OFF( + reg_temp))); + } + stored_by_move |= + TEMP_TO_BIT(reg_temp); + } + + insert_correction( + ctx, block2, block, + BPF_MOV64_REG(state->reg, + state2->reg)); + ctx->regs[state2->reg] = 0; + } + } + + /* 3. Insert necessary loads */ + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if ((stored_by_move & TEMP_TO_BIT(i) || + state2->state != TEMP_STATE_REG) && + state->state == TEMP_STATE_REG) { + /* Insert load */ + insert_correction( + ctx, block2, block, + BPF_LDX_MEM(BPF_DW, state->reg, + BPF_REG_FP, + TEMP_STACK_OFF(i))); + } + } + }); + }); +} + +/** + * Performs the actual register allocation as described in "Quality and Speed in + * Linear-Scan Register Allocation" written by Omri Traub + * @ctx: The allocation context + */ +static void alloc_regs(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block **block_ptr; + struct mptcp_rbs_ebpf_block **block_end; + struct mptcp_rbs_ebpf_block *block; + u32 insn_pos; + + /* Iterate over all instructions and allocate registers */ + ctx->pos = 0; + insn_pos = 0; + block_ptr = ctx->blocks.items; + block_end = ctx->blocks.items + ctx->blocks.len; + for (; block_ptr != block_end; ++block_ptr) { + struct block_info *info; + struct mptcp_rbs_ebpf_instr *end; + struct bpf_insn insn; + + block = *block_ptr; + ctx->block = block; + info = BLOCK_INFO(block); + + /* Set position of block */ + info->insn_pos = insn_pos; + + /* Add block as predecessor */ + if (block->next) + ADD_BLOCK(&BLOCK_INFO(block->next)->preds, block); + if (block->next_else) + ADD_BLOCK(&BLOCK_INFO(block->next_else)->preds, block); + + /* Remember the temporary states on entry */ + memcpy(&info->temps.entry_states, ctx->temps.states, + sizeof(ctx->temps.states)); + + ctx->instr = block->instrs; + end = ctx->instr + block->instr_count; + for (; ctx->instr != end; ++ctx->instr, ++ctx->pos) { + insn = ctx->instr->insn; + alloc_instr_regs(ctx, &insn); + + /* Set jump offsets to the block indexes in the ordered + * list + */ + if (IS_OFF_JMP(&insn)) { + if (BPF_OP(insn.code) == BPF_JA) { + /* JA must be the last instruction in + * the block + */ + BUG_ON(ctx->instr + 1 != end); + BUG_ON(!block->next); + + insn.off = BLOCK_INFO(block->next)->idx; + + /* Do not insert jumps if the next block + * follows directly + */ + info->needs_jmp = + block_ptr + 1 == block_end || + *(block_ptr + 1) != block->next; + if (!info->needs_jmp) + continue; + } else { + /* Jumps except JA must be the second to + * last instructions + */ + BUG_ON(ctx->instr + 1 == end || + ctx->instr + 2 != end); + BUG_ON(!block->next_else); + + insn.off = + BLOCK_INFO(block->next_else)->idx; + } + } + + add_insn(ctx, insn); + ++info->insn_count; + } + + /* Remember the temporary states on exit and store a local copy + * of A + */ + memcpy(&info->temps.exit_states, ctx->temps.states, + sizeof(ctx->temps.states)); + info->A = ctx->A; + + insn_pos += info->insn_count; + } + + /* Resolution phase */ + resolve(ctx); +} + +/** + * Corrects jump instructions to point to the correct destination blocks + * @ctx: The allocation context + */ +static void correct_jmps(struct ctx *ctx) +{ + struct bpf_insn *cur; + struct bpf_insn *end; + int block_count = GET_BLOCK_LIST_LEN(&ctx->blocks); + u32 insn_pos; + + insn_pos = 0; + cur = &ctx->prog->insnsi[0]; + end = cur + ctx->prog->len; + for (; cur != end; ++cur, ++insn_pos) { + if (IS_OFF_JMP(cur)) { + u32 dst; + + if (cur->off >= block_count) + dst = GET_EDGE_BLOCK(&ctx->edge_blocks, + cur->off - block_count) + ->insn_pos; + else + dst = BLOCK_INFO( + GET_BLOCK(&ctx->blocks, cur->off)) + ->insn_pos; + + cur->off = dst - insn_pos - 1; + } + } +} + +struct bpf_prog *mptcp_rbs_ebpf_alloc_regs( + struct mptcp_rbs_ebpf_block *first_block, int used_temps, + struct bpf_prog *prog) +{ + struct mptcp_rbs_ebpf_block *block; + struct ctx ctx; + int i; + + /* Initialize the context */ + memset(&ctx, 0, sizeof(struct ctx)); + INIT_BLOCK_LIST(&ctx.blocks); + INIT_EDGE_BLOCK_LIST(&ctx.edge_blocks); + + for (i = 0; i < used_temps; ++i) { + ctx.temps.infos[i] = + kzalloc(sizeof(struct temp_info), GFP_KERNEL); + } + ctx.temps.count = used_temps; + + /* Traverse CFG into list and set program size to total number of + * instructions + */ + traverse(&ctx, first_block); + ctx.prog = bpf_prog_realloc(prog, bpf_prog_size(ctx.pos), 0); + + /* Calculate live ranges of temporaries */ + calc_liveranges(&ctx); + + /* Perform the actual register allocation */ + alloc_regs(&ctx); + + /* Correct offsets of jump instructions */ + correct_jmps(&ctx); + + /* Release the context and the blocks' tag fields */ + FOREACH_BLOCK(&ctx.blocks, block, { + struct block_info *info = BLOCK_INFO(block); + FREE_BLOCK_LIST(&info->preds); + kfree(info); + }); + FREE_BLOCK_LIST(&ctx.blocks); + FREE_EDGE_BLOCK_LIST(&ctx.edge_blocks); + for (i = 0; i < used_temps; ++i) { + struct temp_info *info = ctx.temps.infos[i]; + + while (info->live_interval) { + struct live_range *range = info->live_interval; + info->live_interval = range->next; + kfree(range); + } + + kfree(info); + } + + mptcp_rbs_optimize_ebpf_ld_sts(ctx.prog); + return ctx.prog; +} + +void mptcp_rbs_ebpf_block_free(struct mptcp_rbs_ebpf_block *block) +{ + kfree(block->instrs); + kfree(block); +} + +static void simple_traverse(struct mptcp_rbs_ebpf_block *block, + struct block_list *list) +{ + struct mptcp_rbs_ebpf_block *block2; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + if (block->next) + simple_traverse(block->next, list); + if (block->next_else) + simple_traverse(block->next_else, list); +} + +void mptcp_rbs_ebpf_blocks_free(struct mptcp_rbs_ebpf_block *first_block) +{ + struct block_list list; + struct mptcp_rbs_ebpf_block *block; + + INIT_BLOCK_LIST(&list); + simple_traverse(first_block, &list); + + FOREACH_BLOCK(&list, block, mptcp_rbs_ebpf_block_free(block)); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h new file mode 100644 index 0000000000000..83d25b01d2843 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h @@ -0,0 +1,219 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_REGALLOC_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_REGALLOC_H + +#include +#include + +#define MAX_ARGS 5 +#define MAX_TEMPS ((MAX_BPF_STACK / 8) > 64 ? 64 : (MAX_BPF_STACK / 8)) + +/* + * Macros for common instructions + */ + +/* Unconditional jumps, goto pc + off16 */ +#define BPF_JMP_OFF(OFF) \ + ((struct bpf_insn){.code = BPF_JMP | BPF_K, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + +/** ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ +#define EBPF_ALU_REG(OP, DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU64_REG(OP, 0, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ +#define EBPF_ALU32_REG(OP, DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU32_REG(OP, 0, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ +#define EBPF_ALU_IMM(OP, DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU64_IMM(OP, 0, IMM), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ +#define EBPF_ALU32_IMM(OP, DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU32_IMM(OP, 0, IMM), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Short form of mov, dst_reg = src_reg */ +#define EBPF_MOV_REG(DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_MOV64_REG(0, 0), \ + .read = {[0] = {.used = 0, .temp = 0 }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Short form of mov, dst_reg = src_reg where src is a "real" BPF register */ +#define EBPF_MOV_RAW_REG(DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_MOV64_REG(0, SRC), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Short form of mov, dst_reg = imm32 */ +#define EBPF_MOV_IMM(DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_MOV64_IMM(0, IMM), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Memory load, dst_reg = *(uint *) (src_reg + off16) */ +#define EBPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_LDX_MEM(SIZE, 0, 0, OFF), \ + .read = {[0] = {.used = 0, .temp = 0 }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Memory store, *(uint *) (dst_reg + off16) = src_reg */ +#define EBPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_STX_MEM(SIZE, 0, 0, OFF), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** Memory store, *(uint *) (dst_reg + off16) = imm32 */ +#define EBPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ST_MEM(SIZE, 0, OFF, IMM), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** + * Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + + * off16. Note that the jump target is given by the next/next_else fields of the + * owning eBPF block + */ +#define EBPF_JMP_REG(OP, DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_JMP_REG(OP, 0, 0, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** + * Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + + * off16. Note that the jump target is given by the next/next_else fields of the + * owning eBPF block + */ +#define EBPF_JMP_IMM(OP, DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_JMP_IMM(OP, 0, IMM, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** + * Unconditional jumps, goto pc + off16. Note that the jump target is given by + * the next/next_else fields of the owning eBPF block + */ +#define EBPF_JMP_OFF() \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_JMP_OFF(0), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** Function call */ +#define EBPF_CALL(FUNC, ARG1, ARG2, ARG3, ARG4, ARG5, RES) \ + EBPF_RAW_INSTR(BPF_EMIT_CALL(FUNC), ARG1, ARG2, ARG3, ARG4, ARG5, RES) + +/** Program exit */ +#define EBPF_EXIT() \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_EXIT_INSN(), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** Raw code statement block */ +#define EBPF_RAW_INSTR(INSN, R1, R2, R3, R4, R5, W) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = INSN, \ + .read = {[0] = {.used = R1 != -1, .temp = R1 }, \ + [1] = {.used = R2 != -1, .temp = R2 }, \ + [2] = {.used = R3 != -1, .temp = R3 }, \ + [3] = {.used = R4 != -1, .temp = R4 }, \ + [4] = {.used = R5 != -1, .temp = R5 } }, \ + .write = {.used = W != -1, .temp = W } }) + +struct bpf_prog; + +/** Information about an used temporary */ +struct mptcp_rbs_ebpf_instr_temp_info { + u8 used : 1, temp : 7; +}; + +/** A single eBPF instruction using temporaries instead of "real" registers */ +struct mptcp_rbs_ebpf_instr { + struct bpf_insn insn; + struct mptcp_rbs_ebpf_instr_temp_info read[MAX_ARGS]; + struct mptcp_rbs_ebpf_instr_temp_info write; +}; + +/** A single eBPF block */ +struct mptcp_rbs_ebpf_block { + /** Number of instructions in the block */ + int instr_count; + /** Tag for various values during register allocation */ + void *tag; + /** Array of instructions inside the block */ + struct mptcp_rbs_ebpf_instr *instrs; + /** + * Pointer to the next block or NULL. This field describes the offset + * of a ja instruction + */ + struct mptcp_rbs_ebpf_block *next; + /** + * Pointer to the next alternative block or NULL. This field describes + * the offset of all jump instructions with an offset except ja + */ + struct mptcp_rbs_ebpf_block *next_else; +}; + +/** + * Performes the register allocation + * @first_block: The first eBPF block + * @used_temps: Number of used temporaries + * @prog: eBPF program where the resulting code should be stored + * @return: The eBPF program with the resulting code + */ +struct bpf_prog *mptcp_rbs_ebpf_alloc_regs( + struct mptcp_rbs_ebpf_block *first_block, int used_temps, + struct bpf_prog *prog); + +/** + * Releases a single eBPF block + * @block: The eBPF block + */ +void mptcp_rbs_ebpf_block_free(struct mptcp_rbs_ebpf_block *block); + +/** + * Releases all eBPF blocks in a CFG + * @first_block: The first eBPF block + */ +void mptcp_rbs_ebpf_blocks_free(struct mptcp_rbs_ebpf_block *first_block); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_lu.c b/net/mptcp/mptcp_rbs_optimizer_lu.c new file mode 100644 index 0000000000000..ae4068d6145d4 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_lu.c @@ -0,0 +1,199 @@ +#include "mptcp_rbs_optimizer_lu.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +static struct mptcp_rbs_smt_var *find_next( + struct mptcp_rbs_cfg_block *block, struct mptcp_rbs_cfg_block_list *list, + struct mptcp_rbs_cfg_block_list *done_list, + struct mptcp_rbs_cfg_block **found_block) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_smt_var *var_smt; + struct mptcp_rbs_smt *smt; + bool already_done = false; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return NULL); + ADD_BLOCK(list, block); + + /* Check if this block holds a NEXT value that was already processed */ + FOREACH_BLOCK(done_list, block2, if (block == block2) { + already_done = true; + break; + }); + + if (!already_done) { + smt = block->first_smt; + while (smt) { + if (smt->kind == SMT_KIND_VAR) { + var_smt = (struct mptcp_rbs_smt_var *) smt; + + if (var_smt->value->kind == + VALUE_KIND_SBFLIST_NEXT && + ((struct mptcp_rbs_value_sbf_list_next *) + var_smt->value) + ->list->kind == + VALUE_KIND_SBFLIST_VAR) { + ADD_BLOCK(done_list, block); + *found_block = block; + return var_smt; + } + } + + smt = smt->next; + } + } + + if (block->next) { + var_smt = find_next(block->next, list, done_list, found_block); + if (var_smt) + return var_smt; + } + + if (block->next_else) { + var_smt = + find_next(block->next_else, list, done_list, found_block); + if (var_smt) + return var_smt; + } + + return NULL; +} + +struct clone_ctx { + int var_number; + int list_var_number; + int i; +}; + +static struct mptcp_rbs_value *clone_user(void *user_ctx, + const struct mptcp_rbs_value *value) +{ + struct clone_ctx *ctx = user_ctx; + const struct mptcp_rbs_value_sbf_var *var_value; + + if (value->kind != VALUE_KIND_SBF_VAR) + return NULL; + + var_value = (const struct mptcp_rbs_value_sbf_var *) value; + if (var_value->var_number != ctx->var_number) + return NULL; + + return (struct mptcp_rbs_value *) mptcp_rbs_value_sbf_list_get_new( + (struct mptcp_rbs_value_sbf_list *) + mptcp_rbs_value_sbf_list_var_new(ctx->list_var_number), + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new( + ctx->i)); +} + +static void clone_smts(struct mptcp_rbs_cfg_block *block, + const struct mptcp_rbs_smt *smt_template, int var_number, + int list_var_number, int i) +{ + struct clone_ctx clone_ctx; + + clone_ctx.var_number = var_number; + clone_ctx.list_var_number = list_var_number; + clone_ctx.i = i; + + while (smt_template) { + struct mptcp_rbs_smt *clone; + + clone = + mptcp_rbs_smt_clone(smt_template, &clone_ctx, clone_user); + + mptcp_rbs_cfg_block_append(block, clone); + smt_template = smt_template->next; + } +} + +static void unroll(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_smt_var *var_smt, + struct mptcp_rbs_cfg_block *block) +{ + int list_var_number; + struct mptcp_rbs_smt *smt_template; + struct mptcp_rbs_cfg_block *next_block = block->next; + int i; + + list_var_number = + ((struct mptcp_rbs_value_sbf_list_var + *) ((struct mptcp_rbs_value_sbf_list_next *) var_smt->value) + ->list) + ->var_number; + smt_template = next_block->first_smt; + next_block->first_smt = NULL; + + for (i = 0; i < ctx->variation->sbf_num; ++i) { + clone_smts(next_block, smt_template, var_smt->var_number, + list_var_number, i); + } + smt_template->free(smt_template); + + block->condition->free(block->condition); + block->condition = NULL; + block->next->next = block->next_else; + block->next_else = NULL; +} + +static bool find_loop(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block_list *done_list) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_smt_var *var_smt; + struct mptcp_rbs_cfg_block *block; + + INIT_BLOCK_LIST(&list); + var_smt = + find_next(ctx->variation->first_block, &list, done_list, &block); + + if (var_smt && block->condition && + block->condition->kind == VALUE_KIND_IS_NOT_NULL) { + struct mptcp_rbs_value_is_not_null *cond = + (struct mptcp_rbs_value_is_not_null *) block->condition; + + if (cond->operand->kind == VALUE_KIND_SBF_VAR && + ((struct mptcp_rbs_value_sbf_var *) cond->operand) + ->var_number == var_smt->var_number) { + /* TODO Support unrolling of ifs etc. + clone blocks until goto to loop block is found + + - Search for block with goto = loop block and + remember it + - Set the tag to a special value and the goto to NULL + - Find the tag in the cloned ones and set the goto + */ + + /* Check if the loop is short enough to be + * unrolled */ + if (block->next && block->next->next == block) { + /* Can be unrolled */ + unroll(ctx, var_smt, block); + } + } + } + + FREE_BLOCK_LIST(&list); + return var_smt != NULL; +} + +void mptcp_rbs_opt_lu(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + + if (!ctx->variation->sbf_num) { + /* Loop unrolling is only possible with a fixed number of + * subflows + */ + return; + } + + INIT_BLOCK_LIST(&list); + while (find_loop(ctx, &list)) { + /* Do nothing */ + } + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_lu.h b/net/mptcp/mptcp_rbs_optimizer_lu.h new file mode 100644 index 0000000000000..bcb6b055f2e48 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_lu.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_LU_H +#define _MPTCP_RBS_OPTIMIZER_LU_H + +struct mptcp_rbs_opt_ctx; + +/** + * Loop Unrolling: + * Unrolls FOREACH loops over subflows if possible + * @ctx: The optimization context + */ +void mptcp_rbs_opt_lu(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_vi.c b/net/mptcp/mptcp_rbs_optimizer_vi.c new file mode 100644 index 0000000000000..4e3a545484b32 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_vi.c @@ -0,0 +1,462 @@ +#include "mptcp_rbs_optimizer_vi.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +static void opt_value(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_value **value_ptr) +{ + struct mptcp_rbs_value *value = *value_ptr; + +#define APPLY_ON_BIN(val) \ + opt_value(ctx, (struct mptcp_rbs_value **) &(val)->left_operand); \ + opt_value(ctx, (struct mptcp_rbs_value **) &(val)->right_operand); \ + break; + + switch (value->kind) { + case VALUE_KIND_CONSTINT: + case VALUE_KIND_CONSTSTRING: + case VALUE_KIND_NULL: + break; + case VALUE_KIND_BOOL_VAR: + case VALUE_KIND_INT_VAR: + case VALUE_KIND_STRING_VAR: + case VALUE_KIND_SBF_VAR: + case VALUE_KIND_SKB_VAR: + case VALUE_KIND_SKBLIST_VAR: { + /* Right now we only support inlining of SUBFLOWS list values */ + break; + } + case VALUE_KIND_SBFLIST_VAR: { + int var_index = + ((struct mptcp_rbs_value_sbf_list_var *) value)->var_number; + struct mptcp_rbs_opt_var_info *info = + &ctx->var_infos[var_index]; + + if (info->smt && + info->smt->value->kind == VALUE_KIND_SUBFLOWS) { + *value_ptr = + mptcp_rbs_value_clone(info->smt->value, NULL, NULL); + value->free(value); + --info->usage; + } + + break; + } + case VALUE_KIND_NOT: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_not *) value) + ->operand); + break; + } + case VALUE_KIND_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_equal *) value) + } + case VALUE_KIND_UNEQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_unequal *) value) + } + case VALUE_KIND_LESS: { + APPLY_ON_BIN((struct mptcp_rbs_value_less *) value) + } + case VALUE_KIND_LESS_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_less_equal *) value) + } + case VALUE_KIND_GREATER: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater *) value) + } + case VALUE_KIND_GREATER_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater_equal *) value) + } + case VALUE_KIND_AND: { + APPLY_ON_BIN((struct mptcp_rbs_value_and *) value) + } + case VALUE_KIND_OR: { + APPLY_ON_BIN((struct mptcp_rbs_value_or *) value) + } + case VALUE_KIND_ADD: { + APPLY_ON_BIN((struct mptcp_rbs_value_add *) value) + } + case VALUE_KIND_SUBTRACT: { + APPLY_ON_BIN((struct mptcp_rbs_value_subtract *) value) + } + case VALUE_KIND_MULTIPLY: { + APPLY_ON_BIN((struct mptcp_rbs_value_multiply *) value) + } + case VALUE_KIND_DIVIDE: { + APPLY_ON_BIN((struct mptcp_rbs_value_divide *) value) + } + case VALUE_KIND_REMAINDER: { + APPLY_ON_BIN((struct mptcp_rbs_value_remainder *) value) + } + case VALUE_KIND_IS_NULL: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_is_null *) value) + ->operand); + break; + } + case VALUE_KIND_IS_NOT_NULL: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_is_not_null *) value) + ->operand); + break; + } + case VALUE_KIND_REG: + case VALUE_KIND_Q: + case VALUE_KIND_QU: + case VALUE_KIND_RQ: + case VALUE_KIND_CURRENT_TIME_MS: + case VALUE_KIND_RANDOM: + case VALUE_KIND_SBFLIST_FILTER_SBF: + case VALUE_KIND_SKBLIST_FILTER_SKB: + case VALUE_KIND_SUBFLOWS: { + /* Cannot use variables */ + break; + } + case VALUE_KIND_SBF_RTT: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_IS_BACKUP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_is_backup *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_CWND: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_cwnd *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_SKBS_IN_FLIGHT: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_skbs_in_flight *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_LOST_SKBS: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lost_skbs *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_HAS_WINDOW_FOR: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->sbf); + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->skb); + break; + } + case VALUE_KIND_SBF_ID: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_id *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_DELAY_IN: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_in *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_DELAY_OUT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_out *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_BW_OUT_SEND: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_send *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_BW_OUT_ACK: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_ack *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_SSTHRESH: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_ssthresh *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_THROTTLED: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_throttled *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_LOSSY: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lossy *) value) + ->sbf); + break; + } + case VALUE_KIND_SBFLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_next *) value) + ->list); + break; + } + case VALUE_KIND_SBFLIST_EMPTY: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_empty *) value) + ->list); + break; + } + case VALUE_KIND_SBFLIST_FILTER: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + break; + } + case VALUE_KIND_SBFLIST_MAX: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_max *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_max *) value) + ->cond); + break; + } + case VALUE_KIND_SBFLIST_MIN: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_min *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_min *) value) + ->cond); + break; + } + case VALUE_KIND_SBFLIST_GET: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_get *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_get *) value) + ->index); + break; + } + case VALUE_KIND_SBFLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_count *) value) + ->list); + break; + } + case VALUE_KIND_SBFLIST_SUM: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_sum *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_sum *) value) + ->cond); + break; + } + case VALUE_KIND_SKB_SENT_ON: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on *) value) + ->sbf); + break; + } + case VALUE_KIND_SKB_SENT_ON_ALL: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + break; + } + case VALUE_KIND_SKB_USER: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_user *) value) + ->skb); + break; + } + case VALUE_KIND_SKBLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_next *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_EMPTY: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_empty *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_POP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_pop *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_FILTER: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + break; + } + case VALUE_KIND_SKBLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_count *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_TOP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_top *) value) + ->list); + break; + } + default: + break; + } +} + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_smt *smt) +{ + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &drop_smt->skb); + break; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &print_smt->msg); + if (print_smt->arg) + opt_value(ctx, &print_smt->arg); + break; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &push_smt->sbf); + opt_value(ctx, (struct mptcp_rbs_value **) &push_smt->skb); + break; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &set_smt->value); + break; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &set_user_smt->value); + break; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + + ctx->var_infos[var_smt->var_number].smt = var_smt; + opt_value(ctx, &var_smt->value); + break; + } + case SMT_KIND_VOID: { + struct mptcp_rbs_smt_void *void_smt = + (struct mptcp_rbs_smt_void *) smt; + + if (void_smt->value) + opt_value(ctx, &void_smt->value); + break; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + break; + } + } +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_smt *smt; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + smt = block->first_smt; + while (smt) { + opt_smt(ctx, smt); + smt = smt->next; + } + + if (block->condition) + opt_value(ctx, (struct mptcp_rbs_value **) &block->condition); + if (block->next) + opt_block(ctx, block->next, list); + if (block->next_else) + opt_block(ctx, block->next_else, list); +} + +void mptcp_rbs_opt_vi(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + + INIT_BLOCK_LIST(&list); + opt_block(ctx, ctx->variation->first_block, &list); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_vi.h b/net/mptcp/mptcp_rbs_optimizer_vi.h new file mode 100644 index 0000000000000..b449b7ba0cbc7 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_vi.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_VI_H +#define _MPTCP_RBS_OPTIMIZER_VI_H + +struct mptcp_rbs_opt_ctx; + +/** + * Variable inlining: + * Inlines the value of a variable directly where the variable is used + * @ctx: The optimization context + */ +void mptcp_rbs_opt_vi(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_parser.c b/net/mptcp/mptcp_rbs_parser.c new file mode 100644 index 0000000000000..cde89b2299cf2 --- /dev/null +++ b/net/mptcp/mptcp_rbs_parser.c @@ -0,0 +1,1933 @@ +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value_parser.h" + +/* Macro to get the name of the parse function of a value */ +#define PARSE_FUNC(STRUCT) STRUCT##_parse + +/* Macro to ignore a value */ +#define APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + +/* Macro to parse custom values without owner */ +#define APPLY_PARSE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value = PARSE_FUNC(STRUCT)(ctx); \ + if (!value) \ + return NULL; \ + return (struct mptcp_rbs_value *) value; \ + } + +/* Macro to parse custom values with a subflow owner */ +#define APPLY_PARSE_SBF_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_sbf *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +/* Macro to parse custom values with a subflow list owner */ +#define APPLY_PARSE_SBF_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_sbf_list *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +/* Macro to parse custom values with a sockbuffer owner */ +#define APPLY_PARSE_SKB_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_skb *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +/* Macro to parse custom values with a sockbuffer list owner */ +#define APPLY_PARSE_SKB_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_skb_list *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +static bool expect_token(struct parse_ctx *ctx, enum mptcp_rbs_token_kind kind, + struct mptcp_rbs_token *token) +{ + if (!mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return false; + } + + if (token->kind != kind) { + char s1[TOKEN_BUFFER_LEN]; + char s2[TOKEN_BUFFER_LEN]; + + memset(s1, 0, TOKEN_BUFFER_LEN); + memset(s2, 0, TOKEN_BUFFER_LEN); + + mptcp_rbs_token_kind_to_string(kind, s1); + mptcp_rbs_token_to_string(token, s2); + + printk("%d: Token %s expected but %s found\n", token->position, + s1, s2); + return false; + } + + return true; +} + +static bool lookahead_token(struct parse_ctx *ctx, + struct mptcp_rbs_token *token) +{ + return mptcp_rbs_get_next_token_lookahead( + ctx->str, ctx->position, ctx->line, ctx->line_position, token); +} + +int sprintf_null(char **buf, const char *fmt, ...) +{ + int n; + + va_list args; + va_start(args, fmt); + + if (buf && *buf) { + n = vsprintf(*buf, fmt, args); + *buf += n; + } else + n = vsnprintf(NULL, 0, fmt, args); + + va_end(args); + + return n; +} + +static struct mptcp_rbs_value *parse_value(struct parse_ctx *ctx, + int *value_position); + +static struct mptcp_rbs_value *parse_value_base(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_token token; + + if (!mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return NULL; + } + + *value_position = token.position; + + switch (token.kind) { + case TOKEN_KIND_NUMBER: + return (struct mptcp_rbs_value *) mptcp_rbs_value_constint_new( + token.number); + case TOKEN_KIND_STRING: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_conststring_new(strclone(token.string)); + case TOKEN_KIND_NULL: + return (struct mptcp_rbs_value *) mptcp_rbs_value_null_new(); + case TOKEN_KIND_IDENT: { + const char *str = token.string; + struct repl *repl; + struct var *var; + + /* Might be register */ + if (str[0] == 'R' && str[1] >= '0' && str[1] <= '9' && + str[2] == 0) { + if (str[1] < '1' || + str[1] > ('0' + MPTCP_RBS_REG_COUNT)) { + printk("%d: Register name %s is invalid\n", + token.position, token.string); + return NULL; + } + + return (struct mptcp_rbs_value *) + mptcp_rbs_value_reg_new(str[1] - '1'); + } + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Might be replacement */ + FOREACH_REPL(&ctx->repls, repl, if (!strcmp(str, repl->name)) { + return repl->new_value(repl->tag); + }); + + /* Might be variable */ + FOREACH_STACK_VAR( + &ctx->var_stack, var, if (!strcmp(str, var->name)) { + switch (var->type) { + case TYPE_KIND_NULL: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_null_new(); + case TYPE_KIND_BOOL: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_bool_var_new( + var->var_number); + case TYPE_KIND_INT: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_int_var_new( + var->var_number); + case TYPE_KIND_STRING: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_string_var_new( + var->var_number); + case TYPE_KIND_SBF: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_var_new( + var->var_number); + case TYPE_KIND_SBFLIST: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_var_new( + var->var_number); + case TYPE_KIND_SKB: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_skb_var_new( + var->var_number, var->reinject); + case TYPE_KIND_SKBLIST: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_skb_list_var_new( + var->var_number, + var->underlying_queue_kind); + } + }); + + /* Not found */ + printk("%d:%d (%d): Unknown function/property %s\n", token.line, + token.line_position, token.position, str); + return NULL; + } + case TOKEN_KIND_OPEN_BRACKET: { + struct mptcp_rbs_value *inner = + parse_value(ctx, value_position); + if (!inner) + return NULL; + + *value_position = token.position; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + inner->free(inner); + return NULL; + } + + return inner; + } + default: { + char s1[TOKEN_BUFFER_LEN]; + + memset(s1, 0, TOKEN_BUFFER_LEN); + mptcp_rbs_token_to_string(&token, s1); + + printk("%d: Value expected but %s found\n", token.position, s1); + return NULL; + } + } +} + +static struct mptcp_rbs_value *parse_value_dot(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value *value; + + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return NULL; + } + + value = parse_value_base(ctx, value_position); + if (!value) + return NULL; + + while (true) { + /* . might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + value->free(value); + return NULL; + } + + if (token.kind != TOKEN_KIND_DOT) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* Identifier, SET_USER or PUSH must follow */ + if (!lookahead_token(ctx, &token)) { + value->free(value); + return NULL; + } + + if (token.kind == TOKEN_KIND_PUSH || token.kind == TOKEN_KIND_SET_USER) { + if (mptcp_rbs_value_get_type(value->kind) != + TYPE_KIND_SBF) { + printk( + "%d: Unknown function/property %s for %s\n", + token.position, token.string, + mptcp_rbs_type_get_name( + mptcp_rbs_value_get_type(value->kind))); + value->free(value); + return NULL; + } + + return value; + } + + if (!expect_token(ctx, TOKEN_KIND_IDENT, &token)) { + value->free(value); + return NULL; + } + + switch (mptcp_rbs_value_get_type(value->kind)) { + case TYPE_KIND_SBF: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SBF_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk("%d: Unknown function/property %s for subflow\n", + token.position, str); + value->free(value); + return NULL; + } + case TYPE_KIND_SBFLIST: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SBF_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk("%d:%d (%d): Unknown function/property %s for " + "subflow " + "list\n", + token.line, token.line_position, token.position, + str); + value->free(value); + return NULL; + } + case TYPE_KIND_SKB: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SKB_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk( + "%d: Unknown function/property %s for sockbuffer\n", + token.position, str); + value->free(value); + return NULL; + } + case TYPE_KIND_SKBLIST: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SKB_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk("%d: Unknown function/property %s for " + "sockbuffer list\n", + token.position, str); + value->free(value); + return NULL; + } + default: { + printk("%d: Unknown function/property %s for %s\n", + token.position, token.string, + mptcp_rbs_type_get_name( + mptcp_rbs_value_get_type(value->kind))); + value->free(value); + return NULL; + } + } + } + + return value; +} + +static struct mptcp_rbs_value *parse_value_not(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *inner; + struct mptcp_rbs_token token; + bool negate; + + /* ! might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return NULL; + } + + negate = token.kind == TOKEN_KIND_NOT; + if (negate) + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + inner = parse_value_dot(ctx, value_position); + if (!inner) + return NULL; + + if (negate) { + enum mptcp_rbs_type_kind type = + mptcp_rbs_value_get_type(inner->kind); + + if (type != TYPE_KIND_BOOL) { + printk("%d: ! operator cannot be applied on %s\n", + token.position, mptcp_rbs_type_get_name(type)); + inner->free(inner); + return NULL; + } + + return (struct mptcp_rbs_value *) mptcp_rbs_value_not_new( + (struct mptcp_rbs_value_bool *) inner); + } + + return inner; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +static struct mptcp_rbs_value *parse_value_multiply(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_not(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* *, / or % might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_MUL && + token.kind != TOKEN_KIND_DIV && + token.kind != TOKEN_KIND_REM) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_not(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_INT || right_type != TYPE_KIND_INT) { + switch (token.kind) { + case TOKEN_KIND_MUL: { + printk("%d: * operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + case TOKEN_KIND_DIV: { + printk("%d: / operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + case TOKEN_KIND_REM: { + printk("%d: %% operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + } + + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + switch (token.kind) { + case TOKEN_KIND_MUL: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_multiply_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_DIV: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_divide_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_REM: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_remainder_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + } + } + + return left_value; +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +static struct mptcp_rbs_value *parse_value_add(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_multiply(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* + or - might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_ADD && + token.kind != TOKEN_KIND_SUB) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_multiply(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_INT || right_type != TYPE_KIND_INT) { + switch (token.kind) { + case TOKEN_KIND_ADD: { + printk("%d: + operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + case TOKEN_KIND_SUB: { + printk("%d: - operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + } + + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + switch (token.kind) { + case TOKEN_KIND_ADD: { + left_value = + (struct mptcp_rbs_value *) mptcp_rbs_value_add_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_SUB: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_subtract_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + } + } + + return left_value; +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +static struct mptcp_rbs_value *parse_value_cmp(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_add(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* =, !=, <, <=, > or >= might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_EQUAL && + token.kind != TOKEN_KIND_UNEQUAL && + token.kind != TOKEN_KIND_LESS && + token.kind != TOKEN_KIND_LESS_EQUAL && + token.kind != TOKEN_KIND_GREATER && + token.kind != TOKEN_KIND_GREATER_EQUAL) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_add(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + switch (token.kind) { + case TOKEN_KIND_EQUAL: { + if (right_type == TYPE_KIND_NULL) { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_is_null_new(left_value); + + right_value->free(right_value); + break; + } + + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: = operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_equal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_UNEQUAL: { + if (right_type == TYPE_KIND_NULL) { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_is_not_null_new(left_value); + + right_value->free(right_value); + break; + } + + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: != operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_unequal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_LESS: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: < operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = + (struct mptcp_rbs_value *) mptcp_rbs_value_less_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_LESS_EQUAL: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: <= operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_less_equal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_GREATER: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: > operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_greater_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_GREATER_EQUAL: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: >= operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_greater_equal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + } + } + + return left_value; +} +#pragma GCC diagnostic pop + +static struct mptcp_rbs_value *parse_value_and(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_cmp(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* OR might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_AND) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_cmp(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_BOOL || + right_type != TYPE_KIND_BOOL) { + printk( + "%d: AND operator cannot be applied on %s and %s\n", + token.position, mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) mptcp_rbs_value_and_new( + (struct mptcp_rbs_value_bool *) left_value, + (struct mptcp_rbs_value_bool *) right_value); + } + + return left_value; +} + +static struct mptcp_rbs_value *parse_value_allow_pop(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_and(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* OR might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_OR) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_and(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_BOOL || + right_type != TYPE_KIND_BOOL) { + printk( + "%d: OR operator cannot be applied on %s and %s\n", + token.position, mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) mptcp_rbs_value_or_new( + (struct mptcp_rbs_value_bool *) left_value, + (struct mptcp_rbs_value_bool *) right_value); + } + + return left_value; +} + +static struct mptcp_rbs_value *parse_value(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *value; + + value = parse_value_allow_pop(ctx, value_position); + if (value && value->kind == VALUE_KIND_SKBLIST_POP) { + printk("%d: POP can only be used inside DROP or PUSH\n", + *value_position); + value->free(value); + return NULL; + } + + return value; +} + +static struct mptcp_rbs_value_bool *parse_value_bool(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_BOOL) { + printk("%d: Boolean value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_bool *) result; +} + +static struct mptcp_rbs_value_int *parse_value_int(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_INT) { + printk("%d: Integer value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_int *) result; +} + +static struct mptcp_rbs_value_string *parse_value_string(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_STRING) { + printk("%d: String value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_string *) result; +} + +static struct mptcp_rbs_value_sbf *parse_value_sbf(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_SBF) { + printk("%d: Subflow value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_sbf *) result; +} + +static struct mptcp_rbs_value_skb *parse_value_skb(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_SKB) { + printk("%d: Sockbuffer value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_skb *) result; +} + +static struct mptcp_rbs_value_skb *parse_value_skb_allow_pop( + struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value_allow_pop(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_SKB) { + printk("%d: Sockbuffer value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_skb *) result; +} + +static bool parse_smt(struct parse_ctx *ctx, struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, bool *return_found); + +static bool parse_smt_drop(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_skb *value; + struct mptcp_rbs_smt_drop *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* SKB value must follow */ + value = parse_value_skb_allow_pop(ctx); + if (!value) + return false; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_drop_new(value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smts(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, char *var_name, + int var_number, enum mptcp_rbs_type_kind var_type, + bool reinject, bool *return_found) +{ + bool result = true; + struct var *var; + struct var_list vars; + struct mptcp_rbs_token token; + struct mptcp_rbs_smt *last_smt = NULL; + + /* { must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_CURLY, &token)) + return false; + + INIT_VAR_LIST(&vars); + PUSH_VAR_LIST(&ctx->var_stack, &vars); + if (var_name) { + var = var_new(var_name, var_number, var_type, &reinject, NULL); + ADD_VAR(&vars, var); + } + + while (true) { + if (!parse_smt(ctx, block, &last_smt, return_found)) { + result = false; + break; + } + + /* } might follow */ + if (!lookahead_token(ctx, &token)) { + result = false; + break; + } + if (token.kind == TOKEN_KIND_CLOSE_CURLY || *return_found) { + /* } must have followed */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_CURLY, &token)) + result = false; + break; + } + } + + POP_VAR_LIST(&ctx->var_stack); + FOREACH_VAR(&vars, var, var_free(var)); + FREE_VAR_LIST(&vars); + return result; +} + +/* + * FOREACH loops are translated into ifs and gotos as follows: + * + * FOREACH (VAR x IN y) { + * z; + * } + * + * --> + * + * b0: + * VAR v1 = y; + * GOTO b1; + * + * b1: + * VAR x = v1.NEXT(); + * if (x != NULL) GOTO b2 ELSE GOTO b3; + * + * b2: + * z; + * GOTO b1; + * + * b3: + * ... + */ +static bool parse_smt_foreach(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, + bool *return_found) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + int value_position; + struct mptcp_rbs_value *value; + enum mptcp_rbs_type_kind type; + int var_number; + struct mptcp_rbs_smt_var *var_smt; + struct mptcp_rbs_cfg_block *next_block; + struct mptcp_rbs_cfg_block *last_block; + enum mptcp_rbs_value_kind underlying_queue_kind; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* VAR must follow */ + if (!expect_token(ctx, TOKEN_KIND_VAR, &token)) + return false; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return false; + + /* IN must follow */ + if (!expect_token(ctx, TOKEN_KIND_IN, &token)) + return false; + + /* Subflow or sockbuffer list must follow */ + value = parse_value(ctx, &value_position); + if (!value) + return false; + underlying_queue_kind = ctx->underlying_queue_kind; + + type = mptcp_rbs_value_get_type(value->kind); + if (type != TYPE_KIND_SBFLIST && type != TYPE_KIND_SKBLIST) { + printk("%d: List value expected but %s found\n", value_position, + mptcp_rbs_type_get_name(type)); + value->free(value); + return false; + } + + /* Create VAR v1 = y; */ + var_number = ctx->var_index; + var_smt = mptcp_rbs_smt_var_new(var_number, false, value); + ++ctx->var_index; + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) var_smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) var_smt; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) + return false; + + /* b1: */ + next_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next = next_block; + *block = next_block; + + /* VAR x = v1.NEXT(); */ + if (type == TYPE_KIND_SBFLIST) { + struct mptcp_rbs_value_sbf_list_var *var_value = + mptcp_rbs_value_sbf_list_var_new(var_number); + + value = (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_next_new( + (struct mptcp_rbs_value_sbf_list *) var_value); + } else { + struct mptcp_rbs_value_skb_list_var *var_value = + mptcp_rbs_value_skb_list_var_new(var_number, + underlying_queue_kind); + + value = (struct mptcp_rbs_value *) + mptcp_rbs_value_skb_list_next_new( + (struct mptcp_rbs_value_skb_list *) var_value); + } + + var_number = ctx->var_index; + var_smt = mptcp_rbs_smt_var_new(var_number, false, value); + ++ctx->var_index; + (*block)->first_smt = (struct mptcp_rbs_smt *) var_smt; + + /* if (x != NULL) GOTO b2 ELSE GOTO b3; */ + if (type == TYPE_KIND_SBFLIST) { + value = (struct mptcp_rbs_value *) mptcp_rbs_value_sbf_var_new( + var_number); + type = TYPE_KIND_SBF; + } else { + value = (struct mptcp_rbs_value *) mptcp_rbs_value_skb_var_new( + var_number, underlying_queue_kind == VALUE_KIND_RQ); + type = TYPE_KIND_SKB; + } + + (*block)->condition = + (struct mptcp_rbs_value_bool *) mptcp_rbs_value_is_not_null_new( + value); + + /* b2: */ + next_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next = next_block; + + /* Statements must follow */ + if (!parse_smts(ctx, &next_block, ident_token.string, var_number, type, + underlying_queue_kind == VALUE_KIND_RQ, return_found)) + return false; + + /* b3: */ + last_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next_else = last_block; + + if (*return_found) { + /* GOTO b3; */ + next_block->next = last_block; + } else { + /* GOTO b1; */ + next_block->next = *block; + } + + *block = last_block; + *last_smt = NULL; + return true; +} + +static bool parse_smt_if(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, bool *return_found) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_bool *value; + struct mptcp_rbs_cfg_block *branch_block; + struct mptcp_rbs_cfg_block *next_block = NULL; + bool if_return_found; + bool else_return_found = false; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* Boolean value must follow */ + value = parse_value_bool(ctx); + if (!value) + return false; + (*block)->condition = value; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) + return false; + + branch_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next = branch_block; + + /* Statements must follow */ + if (!parse_smts(ctx, &branch_block, NULL, 0, TYPE_KIND_NULL, false, + &if_return_found)) + return false; + + if (!if_return_found) { + next_block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + branch_block->next = next_block; + } + + /* else might follow */ + if (!lookahead_token(ctx, &token)) + return false; + + if (token.kind == TOKEN_KIND_ELSE) { + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* if might follow */ + if (!lookahead_token(ctx, &token)) + return false; + + branch_block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next_else = branch_block; + + if (token.kind == TOKEN_KIND_IF) { + if (!parse_smt_if(ctx, &branch_block, last_smt, + &else_return_found)) + return false; + } else { + /* Statements must follow */ + if (!parse_smts(ctx, &branch_block, NULL, 0, + TYPE_KIND_NULL, false, + &else_return_found)) + return false; + } + + if (!else_return_found) { + if (!next_block) + next_block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), + GFP_KERNEL); + branch_block->next = next_block; + } + } else { + if (!next_block) + next_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), + GFP_KERNEL); + + (*block)->next_else = next_block; + } + + *block = next_block; + *last_smt = NULL; + *return_found = if_return_found && else_return_found; + return true; +} + +static bool parse_smt_print(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_string *value; + struct mptcp_rbs_value *arg_value = NULL; + struct mptcp_rbs_smt_print *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* String value must follow */ + value = parse_value_string(ctx); + if (!value) + return false; + + /* , might follow */ + if (!lookahead_token(ctx, &token)) { + value->free(value); + return false; + } + + if (token.kind == TOKEN_KIND_COMMA) { + int dummy1; + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* Value must follow */ + arg_value = parse_value(ctx, &dummy1); + if (!arg_value) { + value->free(value); + return false; + } + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + if (arg_value) + arg_value->free(arg_value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + if (arg_value) + arg_value->free(arg_value); + return false; + } + + smt = mptcp_rbs_smt_print_new(value, arg_value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_return(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block) +{ + struct mptcp_rbs_token token; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) + return false; + + /* Do nothing because the next pointer of the block is + * already set to NULL + */ + return true; +} + +static bool parse_smt_set(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + int reg_number; + struct mptcp_rbs_value_int *value; + struct mptcp_rbs_smt_set *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* R1 - R6 must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &token)) + return false; + if (strlen(token.string) != 2 || token.string[0] != 'R' || + token.string[1] < '1' || + token.string[1] > ('0' + MPTCP_RBS_REG_COUNT)) { + printk("%d: Register name %s is invalid\n", token.position, + token.string); + return false; + } + reg_number = token.string[1] - '1'; + + /* , must follow */ + if (!expect_token(ctx, TOKEN_KIND_COMMA, &token)) + return false; + + /* Integer value must follow */ + value = parse_value_int(ctx); + if (!value) + return false; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_set_new(reg_number, value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_var(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct var_list *vars; + struct var *var; + struct mptcp_rbs_value *value; + int dummy1; + struct mptcp_rbs_smt_var *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &token)) + return false; + + /* Check if identifier is already used */ + vars = GET_VAR_LIST_STACK_TOP(&ctx->var_stack); + FOREACH_VAR(vars, var, if (!strcmp(token.string, var->name)) { + printk("%d: Variable %s is already declared\n", token.position, + token.string); + return false; + }); + + var = var_new(token.string, ctx->var_index, TYPE_KIND_NULL, NULL, NULL); + ADD_VAR(vars, var); + ++ctx->var_index; + + /* = must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return false; + + /* Value must follow */ + value = parse_value(ctx, &dummy1); + if (!value) + return false; + + var->type = mptcp_rbs_value_get_type(value->kind); + if (var->type == TYPE_KIND_SKB) + var->reinject = + ((struct mptcp_rbs_value_skb *) value)->reinject; + else if (var->type == TYPE_KIND_SKBLIST) + var->underlying_queue_kind = ctx->underlying_queue_kind; + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_var_new(var->var_number, false, value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_void(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value *value = NULL; + int dummy1; + struct mptcp_rbs_smt_void *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* Value might follow */ + if (!lookahead_token(ctx, &token)) + return false; + if (token.kind != TOKEN_KIND_CLOSE_BRACKET) { + value = parse_value(ctx, &dummy1); + if (!value) + return false; + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + if (value) + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + if (value) + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_void_new(value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_other(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + int value_position; + struct mptcp_rbs_value *value = parse_value(ctx, &value_position); + struct mptcp_rbs_smt *smt; + + if (!value) + return false; + + /* PUSH might follow. In this case the previous . was + * already parsed + */ + if (!lookahead_token(ctx, &token)) { + value->free(value); + return false; + } + + if (token.kind == TOKEN_KIND_PUSH) { + struct mptcp_rbs_value_skb *skb_value; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) { + value->free(value); + return false; + } + + /* SKB value must follow */ + skb_value = parse_value_skb_allow_pop(ctx); + if (!skb_value) { + value->free(value); + return false; + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + skb_value->free(skb_value); + return false; + } + + smt = (struct mptcp_rbs_smt *) mptcp_rbs_smt_push_new( + (struct mptcp_rbs_value_sbf *) value, skb_value); + } else if (token.kind == TOKEN_KIND_SET_USER) { + struct mptcp_rbs_value_int *int_value; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) { + value->free(value); + return false; + } + + /* Integer value must follow */ + int_value = parse_value_int(ctx); + if (!int_value) { + value->free(value); + return false; + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + int_value->free(int_value); + return false; + } + + smt = (struct mptcp_rbs_smt *) mptcp_rbs_smt_set_user_new( + (struct mptcp_rbs_value_sbf *) value, int_value); + } else { + printk("%d: Values cannot stand alone\n", value_position); + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + smt->free(smt); + return false; + } + + if (*last_smt) + (*last_smt)->next = smt; + else + (*block)->first_smt = smt; + *last_smt = smt; + + return true; +} + +static bool parse_smt(struct parse_ctx *ctx, struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, bool *return_found) +{ + struct mptcp_rbs_token token; + + if (!lookahead_token(ctx, &token)) + return false; + + switch (token.kind) { + case TOKEN_KIND_DROP: { + *return_found = false; + return parse_smt_drop(ctx, block, last_smt); + } + case TOKEN_KIND_FOREACH: + return parse_smt_foreach(ctx, block, last_smt, return_found); + case TOKEN_KIND_IF: + return parse_smt_if(ctx, block, last_smt, return_found); + case TOKEN_KIND_PRINT: { + *return_found = false; + return parse_smt_print(ctx, block, last_smt); + } + case TOKEN_KIND_RETURN: { + *return_found = true; + return parse_smt_return(ctx, block); + } + case TOKEN_KIND_SET: { + *return_found = false; + return parse_smt_set(ctx, block, last_smt); + } + case TOKEN_KIND_VAR: { + *return_found = false; + return parse_smt_var(ctx, block, last_smt); + } +#ifdef MPTCP_RBS_MEASURE + case TOKEN_KIND_VOID: { + *return_found = false; + return parse_smt_void(ctx, block, last_smt); + } +#endif + default: { + *return_found = false; + return parse_smt_other(ctx, block, last_smt); + } + } +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_parse(const char *str) +{ + struct mptcp_rbs_scheduler *scheduler = + kzalloc(sizeof(struct mptcp_rbs_scheduler), GFP_KERNEL); + struct mptcp_rbs_cfg_block *block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + struct mptcp_rbs_smt *last_smt = NULL; + struct mptcp_rbs_token token; + struct var *var; + struct var_list vars; + struct parse_ctx ctx; + ctx.str = str; + ctx.position = 0; + ctx.line = 0; + ctx.line_position = 0; + ctx.var_index = 0; + scheduler->variations[0].first_block = block; + + /* SCHEDULER must follow */ + if (!expect_token(&ctx, TOKEN_KIND_SCHEDULER, &token)) { + mptcp_rbs_scheduler_free(scheduler); + return NULL; + } + + /* Identifier must follow */ + if (!expect_token(&ctx, TOKEN_KIND_IDENT, &token)) { + mptcp_rbs_scheduler_free(scheduler); + return NULL; + } + scheduler->name = strclone(token.string); + /* ; must follow */ + if (!expect_token(&ctx, TOKEN_KIND_SEMICOLON, &token)) { + mptcp_rbs_scheduler_free(scheduler); + return NULL; + } + + INIT_REPL_STACK(&ctx.repls); + INIT_VAR_LIST(&vars); + INIT_VAR_LIST_STACK(&ctx.var_stack); + PUSH_VAR_LIST(&ctx.var_stack, &vars); + + while (true) { + bool return_found; + /* Check if end is found */ + if (!lookahead_token(&ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + break; + } + if (token.kind == TOKEN_KIND_EOD) + break; + + /* Statement must follow */ + if (!parse_smt(&ctx, &block, &last_smt, &return_found)) { + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + break; + } + + if (return_found) { + /* End found */ + if (!expect_token(&ctx, TOKEN_KIND_EOD, &token)) { + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + } + break; + } + } + + FREE_REPL_STACK(&ctx.repls); + FOREACH_VAR(&vars, var, var_free(var)); + FREE_VAR_LIST(&vars); + FREE_VAR_LIST_STACK(&ctx.var_stack); + + if (scheduler) { + scheduler->variations[0].used_vars = ctx.var_index; + if (ctx.var_index > MPTCP_RBS_MAX_VAR_COUNT) { + printk("Scheduler cannot be parsed because too many " + "variables are used\n"); + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + } + } + + return scheduler; +} diff --git a/net/mptcp/mptcp_rbs_parser.h b/net/mptcp/mptcp_rbs_parser.h new file mode 100644 index 0000000000000..6467cb85ddbd3 --- /dev/null +++ b/net/mptcp/mptcp_rbs_parser.h @@ -0,0 +1,23 @@ +#ifndef _MPTCP_RBS_PARSER_H +#define _MPTCP_RBS_PARSER_H + +/* Define this macro to enable VOID statements */ +#define MPTCP_RBS_MEASURE + +struct mptcp_rbs_scheduler; + +/* + * Formats a given string with arguments, stores it inside of the given + * buffer and returns the number of written characters. The buffer pointer is + * enhanced to the next character after the last written character. If NULL or + * a pointer to NULL is passed the function only calculates the length + */ +int sprintf_null(char **buf, const char *fmt, ...); + +/* + * Tries to build a scheduler from a string + * @return: The parsed scheduler or NULL + */ +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_parse(const char *str); + +#endif diff --git a/net/mptcp/mptcp_rbs_queue.c b/net/mptcp/mptcp_rbs_queue.c new file mode 100644 index 0000000000000..2b795fd3a68a0 --- /dev/null +++ b/net/mptcp/mptcp_rbs_queue.c @@ -0,0 +1,72 @@ +#include "mptcp_rbs_queue.h" +#include + +/* + * some helper for the queue structures + */ + +void mptcp_rbs_advance_send_head(struct sock *sk, struct sk_buff **skb) +{ + if (tcp_skb_is_last(sk, *skb)) + *skb = NULL; + else { + /* we have to reset mptcp_rbs_in_queue as it will NOW be in QU */ + TCP_SKB_CB(*skb)->mptcp_rbs.flags_not_in_queue = 0; + + *skb = tcp_write_queue_next(sk, *skb); + } +} + +unsigned int mptcp_rbs_q_size(struct sock *sk, struct sk_buff *queue_position) +{ + struct sk_buff *initial_qp = queue_position; + + unsigned int i = 0; + while (queue_position) { + i++; + + if (tcp_skb_is_last(sk, queue_position)) { + break; + } + queue_position = queue_position->next; + + if (i > 1000) { + printk("## rbs_q_size for sk %p and queue position %p " + "with sk_write_queue %p of size %u aborted with " + "more than 1000 elements... might be an " + "infinite loop\n", + sk, initial_qp, &sk->sk_write_queue, + sk->sk_write_queue.qlen); + + break; + } + } + return i; +} + +/* + * returns size + */ +unsigned int mptcp_rbs_print_queue(struct sock *sk, struct sk_buff *skb) +{ + unsigned int i = 0; + + for (i = 0; i < 10; i++) { + if (!skb) { + return i; + } + + if (i < 10) { + mptcp_debug("sk_buff queue %p seq %10u and end_seq " + "%10u and len %10u\n", + skb, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, skb->len); + } + + if (tcp_skb_is_last(sk, skb)) + return i + 1; + + skb = tcp_write_queue_next(sk, skb); + } + return i; +} diff --git a/net/mptcp/mptcp_rbs_queue.h b/net/mptcp/mptcp_rbs_queue.h new file mode 100644 index 0000000000000..ef29c65886907 --- /dev/null +++ b/net/mptcp/mptcp_rbs_queue.h @@ -0,0 +1,12 @@ +#ifndef _MPTCP_RBS_QUEUE_H +#define _MPTCP_RBS_QUEUE_H + +#include + +void mptcp_rbs_advance_send_head(struct sock *sk, struct sk_buff **skb); + +unsigned int mptcp_rbs_q_size(struct sock *sk, struct sk_buff *queue_position); + +unsigned int mptcp_rbs_print_queue(struct sock *sk, struct sk_buff *skb); + +#endif diff --git a/net/mptcp/mptcp_rbs_sched.c b/net/mptcp/mptcp_rbs_sched.c new file mode 100644 index 0000000000000..3c5931647ca62 --- /dev/null +++ b/net/mptcp/mptcp_rbs_sched.c @@ -0,0 +1,914 @@ +/* Rule-based MPTCP Scheduler */ + +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_exec.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_user.h" + +#include +#include +#include +#include // required for after(...) +#include +#include + +static const char *default_rules = "SCHEDULER simple;\n" + "VAR sbfCandidates = SUBFLOWS.FILTER(sbf => sbf.CWND > \n" + " sbf.SKBS_IN_FLIGHT + sbf.QUEUED AND !sbf.THROTTLED AND !sbf.LOSSY);\n" + "IF (sbfCandidates.EMPTY) { RETURN; }\n" + "IF (!RQ.EMPTY) {\n" + " sbfCandidates.GET(0).PUSH(RQ.POP());\n" + " RETURN;\n" + "}\n" + "IF (!Q.EMPTY) {\n" + " sbfCandidates.GET(0).PUSH(Q.POP());\n" + "}"; + +/* Linked list with all schedulers */ +static struct mptcp_rbs_scheduler *schedulers; +/* The default scheduler */ +static struct mptcp_rbs_scheduler *default_scheduler; + +/* Parameters to control scheduler */ +bool mptcp_rbs_extended_msgs __read_mostly = false; +module_param(mptcp_rbs_extended_msgs, bool, 0644); +MODULE_PARM_DESC(mptcp_rbs_extended_msgs, "Should we give a bit more dmesg's?"); + +/* Parameters to control the advanced ooo receive ops */ +bool mptcp_ooo_opt __read_mostly = false; +module_param(mptcp_ooo_opt, bool, 0644); +MODULE_PARM_DESC(mptcp_ooo_opt, "Should we run the advanced ooo receive ops?"); + +/* Parameters to turn off CWND */ +bool ignoreSbfCwndConfig __read_mostly = false; +module_param(ignoreSbfCwndConfig, bool, 0644); +MODULE_PARM_DESC(ignoreSbfCwndConfig, "Ignore congestion control."); + +static bool mptcp_rbs_clean_reinject_queue __read_mostly = true; +module_param(mptcp_rbs_clean_reinject_queue, bool, 0644); +MODULE_PARM_DESC(mptcp_rbs_clean_reinject_queue, + "Should the reinjection queue be cleaned?"); + +static bool mptcp_rbs_check_for_gaps_in_seq __read_mostly = false; +module_param(mptcp_rbs_check_for_gaps_in_seq, bool, 0644); +MODULE_PARM_DESC( + mptcp_rbs_check_for_gaps_in_seq, + "Should the scheduler check if there is a gap in the sequence numbers?"); + +static bool mptcp_rbs_check_for_work_conservingness __read_mostly = false; +module_param(mptcp_rbs_check_for_work_conservingness, bool, 0644); +MODULE_PARM_DESC( + mptcp_rbs_check_for_work_conservingness, + "Should the scheduler check if the program is work conserving?"); + +u32 mptcp_ooo_number_matches = 0; + +static bool mptcp_rbs_is_available(const struct sock *sk, + const struct sk_buff *skb, + bool zero_wnd_test, bool cwnd_test) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + /* Set of states for which we are allowed to send data */ + if (!mptcp_sk_can_send(sk)) + return false; + + /* We do not send data on this subflow unless it is + * fully established, i.e. the 4th ack has been received. + */ + if (tp->mptcp->pre_established) + return false; + + if (tp->pf) + return false; + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been acked. + * (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ + if (!tcp_is_reno(tp)) + return false; + else if (tp->snd_una != tp->high_seq) + return false; + } + + if (!tp->mptcp->fully_established) { + /* Make sure that we send in-order data */ + if (skb && tp->mptcp->second_packet && + tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq) + return false; + } + return true; +} + +/* Are we not allowed to reinject this skb on tp? */ +static int mptcp_rbs_dont_reinject_skb(const struct tcp_sock *tp, + const struct sk_buff *skb) +{ + /* If the skb has already been enqueued in this sk, try to find + * another one. + */ + return skb && + /* Has the skb already been enqueued into this subsocket? */ + mptcp_pi_to_flag(tp->mptcp->path_index) & + TCP_SKB_CB(skb)->path_mask; +} + +static struct sock *mptcp_rbs_get_available_subflow(struct sock *meta_sk, + struct sk_buff *skb, + bool zero_wnd_test) +{ + const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + struct sock *sk, *bestsk = NULL, *backupsk = NULL; + + mptcp_rbs_debug( + "### asked for available subflow for meta_sk %p and skb %p with " + "zerownd %u called from %pS\n", + meta_sk, skb, zero_wnd_test, __builtin_return_address(0)); + + /* Answer data_fin on same subflow!!! */ + if (meta_sk->sk_shutdown & RCV_SHUTDOWN && skb && + mptcp_is_data_fin(skb)) { + mptcp_for_each_sk(mpcb, sk) + { + if (tcp_sk(sk)->mptcp->path_index == + mpcb->dfin_path_index && + mptcp_rbs_is_available(sk, skb, zero_wnd_test, + true)) + return sk; + } + } + + /* First, find the best subflow */ + mptcp_for_each_sk(mpcb, sk) + { + struct tcp_sock *tp = tcp_sk(sk); + + if (!mptcp_rbs_is_available(sk, skb, zero_wnd_test, true)) + continue; + + if (mptcp_rbs_dont_reinject_skb(tp, skb)) { + backupsk = sk; + continue; + } + + bestsk = sk; + } + + if (bestsk) { + sk = bestsk; + } else if (backupsk) { + /* It has been sent on all subflows once - let's give it a + * chance again by restarting its pathmask. + */ + if (skb) + TCP_SKB_CB(skb)->path_mask = 0; + sk = backupsk; + } + + mptcp_rbs_debug("returning with sk %p", sk); + return sk; +} + +/* only call this if open action is empty, as we might otherwise free packets + * which are still in open_action */ +static void clean_up_reinjection_queue(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sk_buff *skb = skb_peek(&mpcb->reinject_queue); + u32 counter = 0; + mptcp_rbs_debug("%s for rq %p with peek %p and size %u\n", __func__, + &mpcb->reinject_queue, skb, mpcb->reinject_queue.qlen); + + while (skb) { + struct sk_buff *tmp = skb; + counter++; + + if (counter == 1000) { + printk("%s found more than %u packets in rq with qlen " + "%u\n", + __func__, counter, mpcb->reinject_queue.qlen); + } else if (counter > 10000) { + printk("%s finished it with %u packets in rq and qlen " + "%u\n", + __func__, counter, mpcb->reinject_queue.qlen); + break; + } + + if (skb_queue_is_last(&mpcb->reinject_queue, skb)) { + skb = NULL; + mptcp_rbs_debug("%s for rq %p next in rq is NULL\n", + __func__, &mpcb->reinject_queue); + } else { + skb = skb_queue_next(&mpcb->reinject_queue, skb); + mptcp_rbs_debug("%s for rq %p next in rq is %p\n", + __func__, &mpcb->reinject_queue, skb); + } + + if (after(meta_tp->snd_una, TCP_SKB_CB(tmp)->end_seq) || + (TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_unlink && + TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_free)) { + + /* Segment already reached the peer, remove it */ + mptcp_rbs_debug( + "rbs_clean_up_reinjection queue removes skb %p " + "with end_seq %u seq %u and snd_una %u with next " + "skb %p with to_unlink %u and with to_free %u and " + "not_in_queue %u\n", + tmp, TCP_SKB_CB(tmp)->end_seq, TCP_SKB_CB(tmp)->seq, + meta_tp->snd_una, skb, + TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_unlink, + TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_free, + TCP_SKB_CB(tmp)->mptcp_rbs.flags_not_in_queue); + + __skb_unlink(tmp, &mpcb->reinject_queue); + __kfree_skb(tmp); + } + } +} + +u32 get_number_of_available_subflows(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sock *sk; + u32 result = 0; + + mptcp_for_each_sk(mpcb, sk) + { + struct tcp_sock *tp = (struct tcp_sock *) sk; + if (mptcp_rbs_sbf_is_available(tp)) { + result++; + } + } + + return result; +} + +u32 get_number_of_available_subflows_with_cwnd(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sock *sk; + u32 result = 0; + + mptcp_for_each_sk(mpcb, sk) + { + struct tcp_sock *tp = (struct tcp_sock *) sk; + if (mptcp_rbs_sbf_is_available(tp) && + tp->packets_out < tp->snd_cwnd) + result++; + } + + return result; +} + +static struct sk_buff *process_actions(struct tcp_sock *meta_tp, int *reinject, + struct sock **sbf) +{ + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + enum mptcp_rbs_action_kind kind; + struct sk_buff *skb; + unsigned int end_seq; + + FOREACH_ACTION( + rbs_cb->open_actions, kind, *((struct tcp_sock **) sbf), skb, + end_seq, *reinject, { + if (kind == ACTION_KIND_PUSH) { + mptcp_rbs_debug("Answer with OPEN PUSH ACTION with " + "skb %p on sbf %p (reinjection " + "%i)\n", + skb, *sbf, *reinject); + + /* + * The packet might be already acknowledged, so we + * check + * the sequence numbers + */ + if (!after(end_seq, meta_tp->snd_una)) { + mptcp_rbs_debug("rbs recovered from " + "acknowledged seq without " + "touching skb\n"); + continue; + } + + return skb; + } else if (kind == ACTION_KIND_DROP) { + mptcp_rbs_debug("Execute OPEN DROP ACTION with skb " + "%p (reinjection %i)\n", + skb, *reinject); + /* nothing to do... TODO: remove from open action + * table + */ + } else + BUG_ON(true); + }); + + *reinject = false; + *sbf = NULL; + return NULL; +} + +#ifdef CONFIG_MPTCP_RBSMEASURE + +/* Functions for measurements with SystemTap */ + +void noinline mptcp_rbs_scheduler_opt(const char *sched_name, int sbf_num, + int status) +{ + asm("nop"); +} + +void noinline mptcp_rbs_scheduler_switch(const struct sock *meta_sk, + const char *sched_name, + int old_sbf_num, int sbf_num) +{ + asm("nop"); +} + +#endif + +/* Determines if a scheduler is currently optimized. + * 0 = no + * 1 = yes - the following static variables are currently filled with data + * 2 = yes - the following static variables hold valid data + */ +static atomic_t optimizing = ATOMIC_INIT(0); +/* The scheduler to optimize */ +static struct mptcp_rbs_scheduler *opt_scheduler; +/* Number of subflows the scheduler should be optimized for */ +static u32 opt_sbf_num; +/* Index of the variation where the optimized code should be stored */ +static int opt_variation_idx; + +/* Number of calls necessary to start an subflow number dependent optimization. + * If the number of subflows changes the counter should restart + */ +#define MIN_CALLS_TO_OPT 10 + +static int opt_thread_func(void *data) +{ + struct mptcp_rbs_scheduler_variation variation; + + while (true) { + if (atomic_read(&optimizing) != 2) { + msleep(2); + continue; + } + + barrier(); + + mptcp_rbs_debug("optimizing scheduler %s for %d subflows\n", + opt_scheduler->name, opt_sbf_num); + +#ifdef CONFIG_MPTCP_RBSMEASURE + mptcp_rbs_scheduler_opt(opt_scheduler->name, opt_sbf_num, 1); +#endif + + /* Copy the default variation */ + variation.first_block = mptcp_rbs_cfg_blocks_clone( + opt_scheduler->variations[0].first_block, NULL, NULL); + variation.used_vars = opt_scheduler->variations[0].used_vars; + variation.sbf_num = opt_sbf_num; + + /* Apply optimizations */ + mptcp_rbs_optimize(&variation, &opt_scheduler->del, opt_sbf_num, + mptcp_rbs_opts_enabled == 2); + + /* "Publish" the optimized variation */ + opt_scheduler->variations[opt_variation_idx].used_vars = + variation.used_vars; + opt_scheduler->variations[opt_variation_idx].sbf_num = + variation.sbf_num; + opt_scheduler->variations[opt_variation_idx].first_block = + variation.first_block; + +#ifdef CONFIG_MPTCP_RBSMEASURE + mptcp_rbs_scheduler_opt(opt_scheduler->name, opt_sbf_num, 0); +#endif + + mptcp_rbs_debug("scheduler %s was optimized for %d subflows\n", + opt_scheduler->name, opt_sbf_num); + + atomic_set(&optimizing, 0); + } + + return 0; +} + +struct sk_buff *mptcp_rbs_next_segment(struct sock *meta_sk, int *reinject, + struct sock **subsk, unsigned int *limit) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + struct mptcp_rbs_eval_ctx ctx; + struct sk_buff *skb; + u32 number_of_subflows; + u64 begin_time; + u64 begin_time2; + int i; + unsigned int number_of_evaluations = 0; + struct mptcp_rbs_scheduler_variation *old_variation; + + mptcp_rbs_debug("rbs meta_sk->send_head = %p, own queue = %p , " + "packets in flight %i , cwnd %i, wnd %i and q size %u " + "packets_out %u\n", + tcp_send_head(meta_sk), rbs_cb->queue_position, + meta_tp->packets_out, meta_tp->snd_cwnd, + (tcp_wnd_end(meta_tp) - meta_tp->write_seq), + mptcp_rbs_q_size(meta_sk, rbs_cb->queue_position), + meta_tp->packets_out); + + // TODO check that we have at least window? + + if (!rbs_cb || !rbs_cb->scheduler) { + mptcp_rbs_debug("rbs_cb or scheduler invalid\n"); + return NULL; + } + + if(rbs_cb->execution_bucket == 0) { + printk("%s: WARNING: Execution bucket exceeded for scheduler %s. Now aborting new call.", __func__, rbs_cb->scheduler->name); + return NULL; + } + +#ifdef CONFIG_MPTCP_RBSMEASURE + /* first time measurment */ + begin_time = __native_read_tsc(); +#endif + + /* + * If we still have open actions from previous rule evaluations, + * simply return their results in order + */ + skb = process_actions(meta_tp, reinject, subsk); + if (skb) { +#ifdef CONFIG_MPTCP_RBSMEASURE + rbs_cb->scheduler->total_time_oa_skb += + __native_read_tsc() - begin_time; + rbs_cb->scheduler->total_count_oa_skb++; +#endif + + /* we want to run all existing checks after the rule execution + * to ensure + * that we do not miss a packet for the gap test */ + goto after_rbs_exec; + } + + mptcp_rbs_debug("rbs scheduler no open actions\n"); + + /* only clean the queue if we are sure there is no open action which + * might use it */ + if (mptcp_rbs_clean_reinject_queue) + clean_up_reinjection_queue(meta_tp); + + number_of_subflows = get_number_of_available_subflows(meta_tp); + old_variation = rbs_cb->variation; + rbs_cb->variation = &rbs_cb->scheduler->variations[0]; + + if (mptcp_rbs_opts_enabled) { + /* Check if there is a specific variation */ + for (i = 1; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!rbs_cb->scheduler->variations[i].first_block) + break; + + if (rbs_cb->scheduler->variations[i].sbf_num == + number_of_subflows) { + rbs_cb->variation = + &rbs_cb->scheduler->variations[i]; + break; + } + } + + /* If we have place for another optimized variation */ + if (i != MPTCP_RBS_VARIATION_COUNT) { + if (rbs_cb->last_number_of_subflows != + number_of_subflows) { + rbs_cb->last_number_of_subflows = + number_of_subflows; + rbs_cb->calls_since_sbf_change = 0; + } else if (rbs_cb->calls_since_sbf_change > + MIN_CALLS_TO_OPT && + number_of_subflows && + rbs_cb->variation == + &rbs_cb->scheduler->variations[0]) { + /* We should optimize for this number of + * subflows + */ + if (!atomic_cmpxchg(&optimizing, 0, 1)) { + opt_scheduler = rbs_cb->scheduler; + opt_sbf_num = number_of_subflows; + opt_variation_idx = i; + barrier(); + atomic_inc(&optimizing); + } + rbs_cb->calls_since_sbf_change = 0; + } else + ++rbs_cb->calls_since_sbf_change; + } + } + + if (old_variation != rbs_cb->variation) { +#ifdef CONFIG_MPTCP_RBSMEASURE + mptcp_rbs_scheduler_switch(meta_sk, rbs_cb->scheduler->name, + old_variation->sbf_num, + rbs_cb->variation->sbf_num); +#endif + + mptcp_rbs_debug( + "switching for %p to scheduler optimized for %d subflows\n", + meta_sk, rbs_cb->variation->sbf_num); + } + + /* + * We repeat the execution till it returns a packet in case + * the rule execution had side effects. + * + * A side effect is the change of a register or the execution of + * a POP operation. + * + * This is repeated at most X times, to ensure that + * we terminate in case of unsuitable schedulers. + */ + do { + /* Prepare context */ + memset(&ctx, 0, sizeof(struct mptcp_rbs_eval_ctx)); + ctx.meta_sk = meta_sk; + ctx.mpcb = meta_tp->mpcb; + ctx.rbs_cb = rbs_cb; + ctx.side_effects = 0; + + /* Increase execution counter */ + ++rbs_cb->exec_count; + +#ifdef CONFIG_MPTCP_RBSMEASURE + begin_time2 = __native_read_tsc(); +#endif + + /* Execute the rules and apply new actions if there are any */ + mptcp_rbs_exec(&ctx); + skb = process_actions(meta_tp, reinject, subsk); + +#ifdef CONFIG_MPTCP_RBSMEASURE + if (skb) { + rbs_cb->scheduler->total_exec_count_skb++; + rbs_cb->scheduler->total_exec_time_skb += + __native_read_tsc() - begin_time2; + } else { + rbs_cb->scheduler->total_exec_count_no_skb++; + rbs_cb->scheduler->total_exec_time_no_skb += + __native_read_tsc() - begin_time2; + } +#endif + + number_of_evaluations++; + + if(number_of_evaluations >= 5) { + printk("%s: WARNING: Exceeded 5 evaluations for scheduler %s. Now aborting.", __func__, rbs_cb->scheduler->name); + break; + } + } while(!skb && ctx.side_effects); + +#ifdef CONFIG_MPTCP_RBSMEASURE + if (skb) { + rbs_cb->scheduler->total_time_noa_skb += + __native_read_tsc() - begin_time; + rbs_cb->scheduler->total_count_noa_skb++; + } else { + rbs_cb->scheduler->total_time_noa_no_skb += + __native_read_tsc() - begin_time; + rbs_cb->scheduler->total_count_noa_no_skb++; + } +#endif + +after_rbs_exec: + + if (mptcp_rbs_check_for_gaps_in_seq && skb) { + /* + * is there a gap between the beginning of + * this packet and the highest_seq without a gap? + */ + if (rbs_cb->highest_seq + 1 < TCP_SKB_CB(skb)->seq) { + printk("RBS GAP CHECK found gab for meta_sk %p with " + "current packet %p and its seq %u and current " + "highest_seq %u\n", + meta_tp, skb, TCP_SKB_CB(skb)->seq, + rbs_cb->highest_seq); + } + + /* we mention every gap only once, increase highest_seq? */ + if (rbs_cb->highest_seq > TCP_SKB_CB(skb)->end_seq) { + // still the highest, nothing to do + } else { + rbs_cb->highest_seq = TCP_SKB_CB(skb)->end_seq; + } + } + + /* + * if the skb is null but there are still packets in one of the queues, + * print warning. + * Note that the queue might have changed during rule execution, but the + * state at the end is sufficent. + */ + if (mptcp_rbs_check_for_work_conservingness && !skb && + (rbs_cb->queue_position || meta_tp->mpcb->reinject_queue.qlen)) { + u32 number_of_subflows_with_cwnd = + get_number_of_available_subflows_with_cwnd(meta_tp); + + if (number_of_subflows_with_cwnd) { + printk("RBS WORK CONSERVINESS CHECK found problem for " + "meta_sk %p with Q.TOP %p and RQ.COUNT %u and " + "number_of_evaluations %u and number of " + "available sbf %u with cwnd %u\n", + __func__, rbs_cb->queue_position, + meta_tp->mpcb->reinject_queue.qlen, + number_of_evaluations, number_of_subflows, + number_of_subflows_with_cwnd); + } + } + + if(skb) { + rbs_cb->execution_bucket--; + if(rbs_cb->execution_bucket == 0) { + printk("%s: WARNING: Execution bucket exceeded for scheduler %s. Now aborting.", __func__, rbs_cb->scheduler->name); + } + } + + return skb; +} + +void mptcp_rbs_sbf_bw_init(struct mptcp_rbs_sbf_cb *sbf_cb) +{ + sbf_cb->bw_out_bytes = 0; + sbf_cb->bw_out_last_update_ns = 0; + sbf_cb->bw_ack_bytes = 0; + sbf_cb->bw_ack_last_update_ns = 0; +} + +static void mptcp_rbs_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sock *meta_sk = tp->mpcb->meta_sk; + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if (!rbs_cb->open_actions) + rbs_cb->open_actions = + kzalloc(sizeof(struct mptcp_rbs_actions), GFP_KERNEL); + + if (!rbs_cb->scheduler) { + mptcp_rbs_debug("mptcp_rbs_init for sk %p with meta_sk %p\n", + sk, meta_sk); + rbs_cb->scheduler = default_scheduler; + rbs_cb->variation = &default_scheduler->variations[0]; + ++default_scheduler->usage; + + rbs_cb->highest_seq = meta_tp->snd_una; + + rbs_cb->execution_bucket = 1000; // intital bucket + mptcp_debug("%s init highest seq with last una %u\n", __func__, rbs_cb->highest_seq); + + if (!meta_tp->nonagle) + // during development, this is REALLY important, so + // don't disable it + printk("Warning: Nagle could cause performance " + "issues in combination with RBS\n"); + } else { + mptcp_rbs_debug("mptcp_rbs_init for sk %p with meta_sk %p has " + "already scheduler\n", + sk, meta_sk); + } + + if (meta_sk != sk) { + mptcp_rbs_debug("mptcp_rbs_init for sbf %p with meta_sk %p\n", + sk, meta_sk); + mptcp_rbs_sbf_bw_init( + (struct mptcp_rbs_sbf_cb *) &tp->mptcp->mptcp_sched[0]); + } + + mptcp_debug("%s for sk %p with meta_sk %p\n", __func__, sk, meta_sk); +} + +static void mptcp_rbs_release(struct sock *sk) +{ + if (is_meta_sk(sk)) { + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(tcp_sk(sk)); + + /* If the meta socket is released the scheduler is no longer + * used + */ + --mptcp_rbs_get_cb(tcp_sk(sk))->scheduler->usage; + + kfree(rbs_cb->open_actions); + } + + printk("Releasing %p with is_meta=%d\n", sk, is_meta_sk(sk)); +} + +static void mptcp_rbs_recover_skb(struct sock *meta_sk, struct sock *subsk, + struct sk_buff *skb, bool reinject) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + + mptcp_rbs_debug("rbs scheduler recover with rejected skb %p on sbf " + "%p with existing open action %p\n", + skb, subsk, rbs_cb->open_actions); + + /* skb has highest prio, insert at the beginning */ + mptcp_rbs_action_new(rbs_cb->open_actions, true, ACTION_KIND_PUSH, + tcp_sk(subsk), skb, reinject); + + // TODO Should this really be the default CFG? + // default_rule_set->recovered_count++; +} + +/* type 0 = out, 1 = ack, 2 = ack on skb to get options */ +static void mptcp_rbs_update_stats(struct sock *sk, const struct sk_buff *skb, + unsigned int bytes, unsigned int type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + switch (type) { + case 0: { + struct sock *meta_sk = tp->mpcb->meta_sk; + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + rbs_cb->scheduler->total_bytes_sent += bytes; + + mptcp_debug("rbs adds %u bytes for sk %p on bw out\n", bytes, sk); + mptcp_rbs_sbf_bw_send_add(tp, bytes); + break; + } + case 1: { + mptcp_debug("rbs adds %u bytes for sk %p on bw ack\n", bytes, sk); + mptcp_rbs_sbf_bw_ack_add(tp, bytes); + break; + } + case 2: { + mptcp_rbs_sbf_delay_update(tp, skb); + break; + } + } +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_default(void) +{ + return default_scheduler; +} + +void mptcp_rbs_scheduler_set_default(struct mptcp_rbs_scheduler *scheduler) +{ + default_scheduler = scheduler; +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_registered(void) +{ + return schedulers; +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_find(const char *name) +{ + struct mptcp_rbs_scheduler *tmp = schedulers; + + while (tmp) { + if (!strcmp(tmp->name, name)) + return tmp; + tmp = tmp->next; + } + + return NULL; +} + +bool mptcp_rbs_scheduler_register(struct mptcp_rbs_scheduler *scheduler) +{ + struct mptcp_rbs_scheduler *tmp; + + /* Check if a scheduler with the same name is already registered */ + tmp = schedulers; + while (tmp) { + if (!strcmp(tmp->name, scheduler->name)) + return false; + tmp = tmp->next; + } + + scheduler->next = schedulers; + schedulers = scheduler; + return true; +} + +void mptcp_rbs_scheduler_unregister(struct mptcp_rbs_scheduler *scheduler) +{ + struct mptcp_rbs_scheduler *cur; + struct mptcp_rbs_scheduler *next; + + if (!schedulers) + return; + if (schedulers == scheduler) { + schedulers = schedulers->next; + return; + } + + cur = schedulers; + next = cur->next; + while (next) { + if (next == scheduler) { + cur->next = scheduler->next; + break; + } + + cur = next; + next = cur->next; + } +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(tp); + + return rbs_cb->scheduler; +} + +bool mptcp_rbs_scheduler_set(struct sock *sk, const char *name) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(tp); + struct mptcp_rbs_scheduler *scheduler; + + if (name) + scheduler = mptcp_rbs_scheduler_find(name); + else + scheduler = default_scheduler; + + if (!scheduler) + return false; + + ++scheduler->usage; + --rbs_cb->scheduler->usage; + rbs_cb->scheduler = scheduler; + rbs_cb->variation = &scheduler->variations[0]; + return true; +} + +static struct mptcp_sched_ops mptcp_sched_rbs = { + .get_subflow = mptcp_rbs_get_available_subflow, + .next_segment = mptcp_rbs_next_segment, + .init = mptcp_rbs_init, + .release = mptcp_rbs_release, + .name = "rbs", + .owner = THIS_MODULE, + .recover_skb = mptcp_rbs_recover_skb, + .update_stats = mptcp_rbs_update_stats, +}; + +static int __init rbs_register(void) +{ + BUILD_BUG_ON(sizeof(struct mptcp_rbs_cb) > MPTCP_SCHED_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct mptcp_rbs_sbf_cb) > MPTCP_SCHED_SIZE); + + if (mptcp_register_scheduler(&mptcp_sched_rbs)) + return -1; + + /* Load default scheduler */ + default_scheduler = mptcp_rbs_scheduler_parse(default_rules); + BUG_ON(!default_scheduler); + mptcp_rbs_scheduler_register(default_scheduler); + + /* Register proc for stats */ + mptcp_rbs_user_interface_init(); + +#ifdef CONFIG_MPTCP_RBSOPT + /* Start optimize thread */ + kthread_run(&opt_thread_func, NULL, "mptcp_rbs_opt"); +#endif + + return 0; +} + +static void rbs_unregister(void) +{ + /* Release all schedulers */ + while (schedulers) { + struct mptcp_rbs_scheduler *tmp = schedulers; + schedulers = schedulers->next; + mptcp_rbs_scheduler_free(tmp); + } + + mptcp_unregister_scheduler(&mptcp_sched_rbs); +} + +module_init(rbs_register); +module_exit(rbs_unregister); + +MODULE_AUTHOR("Alexander Froemmgen, Tobias Erbshaeusser"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Rule-based MPTCP Scheduler"); +MODULE_VERSION("0.89"); diff --git a/net/mptcp/mptcp_rbs_sched.h b/net/mptcp/mptcp_rbs_sched.h new file mode 100644 index 0000000000000..5224fa5cb0e51 --- /dev/null +++ b/net/mptcp/mptcp_rbs_sched.h @@ -0,0 +1,44 @@ +#ifndef _MPTCP_RBS_SCHED_H +#define _MPTCP_RBS_SCHED_H + +#include "mptcp_rbs_ctx.h" + +struct mptcp_rbs_scheduler; + +extern bool mptcp_rbs_extended_msgs; + +extern bool mptcp_ooo_opt; + +extern u32 mptcp_ooo_number_matches; + +extern bool ignoreSbfCwndConfig; + +#define mptcp_rbs_debug(fmt, args...) \ + do { \ + if (mptcp_rbs_extended_msgs) \ + mptcp_debug(fmt, ##args); \ + } while (0) + +struct sk_buff *mptcp_rbs_next_segment(struct sock *meta_sk, int *reinject, + struct sock **subsk, + unsigned int *limit); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_default(void); + +void mptcp_rbs_scheduler_set_default(struct mptcp_rbs_scheduler *scheduler); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_registered(void); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_find(const char *name); + +bool mptcp_rbs_scheduler_register(struct mptcp_rbs_scheduler *scheduler); + +void mptcp_rbs_scheduler_unregister(struct mptcp_rbs_scheduler *scheduler); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get(struct sock *sk); + +bool mptcp_rbs_scheduler_set(struct sock *sk, const char *name); + +bool mptcp_rbs_sbf_is_available(struct tcp_sock* sbf); + +#endif diff --git a/net/mptcp/mptcp_rbs_scheduler.c b/net/mptcp/mptcp_rbs_scheduler.c new file mode 100644 index 0000000000000..ce8b0b1883ba5 --- /dev/null +++ b/net/mptcp/mptcp_rbs_scheduler.c @@ -0,0 +1,41 @@ +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_parser.h" +#include + +void mptcp_rbs_scheduler_variation_free( + struct mptcp_rbs_scheduler_variation *variation) +{ + if (variation->first_block) + mptcp_rbs_cfg_blocks_free(variation->first_block); +} + +void mptcp_rbs_scheduler_free(struct mptcp_rbs_scheduler *scheduler) +{ + int i; + + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + mptcp_rbs_scheduler_variation_free(&scheduler->variations[i]); + } + + kfree(scheduler->name); + kfree(scheduler); +} + +int mptcp_rbs_scheduler_print(const struct mptcp_rbs_scheduler *scheduler, + int variation, char *buffer) +{ + int len; + + BUG_ON(variation < 0 || variation >= MPTCP_RBS_VARIATION_COUNT); + BUG_ON(!scheduler->variations[variation].first_block); + + len = sprintf_null(&buffer, "SCHEDULER %s;\n\n", scheduler->name); + len += mptcp_rbs_cfg_blocks_print( + scheduler->variations[variation].first_block, buffer); + + return len; +} diff --git a/net/mptcp/mptcp_rbs_scheduler.h b/net/mptcp/mptcp_rbs_scheduler.h new file mode 100644 index 0000000000000..7b7b3be3f2520 --- /dev/null +++ b/net/mptcp/mptcp_rbs_scheduler.h @@ -0,0 +1,76 @@ +#ifndef _MPTCP_RBS_SCHEDULER_H +#define _MPTCP_RBS_SCHEDULER_H + +#include + +struct mptcp_rbs_cfg_block; + +#define MPTCP_RBS_VARIATION_COUNT 8 + +/* Struct for RBS scheduler variations that are created by the optimizer */ +struct mptcp_rbs_scheduler_variation { + /* The first block of the CFG or NULL if the variation is not used yet + */ + struct mptcp_rbs_cfg_block *first_block; + /* Number of total used variables */ + u8 used_vars; + /* Determines for how many subflows this variation is optimized. Might + * be 0 if the variation is not optimized for subflows + */ + u8 sbf_num; + +#ifdef CONFIG_MPTCP_RBSMEASURE + /* Number of executions */ + u64 exec_count; + /* Total execution time */ + u64 total_time; +#endif +}; + +/* Struct for RBS schedulers */ +struct mptcp_rbs_scheduler { + /* The next scheduler or NULL */ + struct mptcp_rbs_scheduler *next; + /* Name of the scheduler */ + char *name; + /* Array with different variations of the scheduler. The first entry is + * not optimized for a certain number of subflows + */ + struct mptcp_rbs_scheduler_variation + variations[MPTCP_RBS_VARIATION_COUNT]; + /* Number of usages */ + int usage; + /* Determines if the scheduler should be deleted */ + bool del; + +#ifdef CONFIG_MPTCP_RBSMEASURE + u64 total_count_noa_no_skb; + u64 total_time_noa_no_skb; + + u64 total_count_oa_skb; + u64 total_time_oa_skb; + + u64 total_count_noa_skb; + u64 total_time_noa_skb; + + u64 total_exec_count_no_skb; + u64 total_exec_time_no_skb; + + u64 total_exec_count_skb; + u64 total_exec_time_skb; +#endif + /* Total bytes pushed by the scheduler. Used to analyse the + * overhead of redundancy. + */ + u64 total_bytes_sent; +}; + +void mptcp_rbs_scheduler_variation_free( + struct mptcp_rbs_scheduler_variation *variation); + +void mptcp_rbs_scheduler_free(struct mptcp_rbs_scheduler *scheduler); + +int mptcp_rbs_scheduler_print(const struct mptcp_rbs_scheduler *scheduler, + int variation, char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_smt.c b/net/mptcp/mptcp_rbs_smt.c new file mode 100644 index 0000000000000..08ae504e1d7b7 --- /dev/null +++ b/net/mptcp/mptcp_rbs_smt.c @@ -0,0 +1,887 @@ +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_optimizer_ebpf_disasm.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_value.h" +#include +#include + +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_smt_drop *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_drop), GFP_KERNEL); + smt->kind = SMT_KIND_DROP; + smt->free = mptcp_rbs_smt_drop_free; + smt->execute = mptcp_rbs_smt_drop_execute; + smt->skb = skb; + + return smt; +} + +void mptcp_rbs_smt_drop_free(struct mptcp_rbs_smt_drop *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +void mptcp_rbs_smt_drop_execute(struct mptcp_rbs_smt_drop *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return; + + ctx->rbs_cb->execution_bucket += 5; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_DROP, + NULL, skb, self->skb->reinject); +} + +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_clone( + const struct mptcp_rbs_smt_drop *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_drop *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_drop), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->skb = (struct mptcp_rbs_value_skb *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->skb, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_new( + struct mptcp_rbs_value_string *msg, struct mptcp_rbs_value *arg) +{ + struct mptcp_rbs_smt_print *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_print), GFP_KERNEL); + smt->kind = SMT_KIND_PRINT; + smt->free = mptcp_rbs_smt_print_free; + smt->execute = mptcp_rbs_smt_print_execute; + smt->msg = msg; + smt->arg = arg; + + return smt; +} + +void mptcp_rbs_smt_print_free(struct mptcp_rbs_smt_print *self) +{ + MPTCP_RBS_VALUE_FREE(self->msg); + MPTCP_RBS_VALUE_FREE(self->arg); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_smt_print_execute(struct mptcp_rbs_smt_print *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + char *msg = self->msg->execute(self->msg, ctx); + char str[512]; + struct inet_sock *isk = inet_sk(ctx->meta_sk); + + if (!msg) + return; + if (!self->arg) { + printk("ProgMP %p %08X:%04X %08X:%04X: %s\n", ctx->meta_sk, + isk->inet_rcv_saddr, + ntohs(isk->inet_sport), + isk->inet_daddr, + ntohs(isk->inet_dport), + msg); + return; + } + + /* build prefix */ + memset(str, 0, sizeof(str)); + snprintf(str, sizeof(str), "ProgMP %p %08X:%04X %08X:%04X: %s\n", ctx->meta_sk, + isk->inet_rcv_saddr, + ntohs(isk->inet_sport), + isk->inet_daddr, + ntohs(isk->inet_dport), + msg); + msg = str; + + switch (mptcp_rbs_value_get_type(self->arg->kind)) { + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *arg = + (struct mptcp_rbs_value_bool *) self->arg; + s32 value = arg->execute(arg, ctx); + if (value != -1) + printk(msg, value != 0); + break; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *arg = + (struct mptcp_rbs_value_int *) self->arg; + s64 value = arg->execute(arg, ctx); + if (value != -1) + printk(msg, (unsigned int) value); + break; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *arg = + (struct mptcp_rbs_value_string *) self->arg; + char *value = arg->execute(arg, ctx); + if (value) + printk(msg, value); + break; + } + case TYPE_KIND_NULL: { + printk(msg, NULL); + break; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *arg = + (struct mptcp_rbs_value_sbf *) self->arg; + struct tcp_sock *value = arg->execute(arg, ctx); + if (value) { + printk(msg, value, value->mptcp->sbf_id); + } else { + printk(msg, 0, 0); + } + break; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *arg = + (struct mptcp_rbs_value_skb *) self->arg; + struct sk_buff *value = arg->execute(arg, ctx); + if (value) { + printk(msg, value, TCP_SKB_CB(value)->seq, + TCP_SKB_CB(value)->end_seq); + } else { + printk(msg, 0, 0, 0); + } + break; + } + case TYPE_KIND_SBFLIST: + case TYPE_KIND_SKBLIST: + /* Not possible */ + break; + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_clone( + const struct mptcp_rbs_smt_print *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_print *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_print), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->msg = (struct mptcp_rbs_value_string *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->msg, user_ctx, user_func); + if (clone->arg) + clone->arg = + mptcp_rbs_value_clone(clone->arg, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_smt_push *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_push), GFP_KERNEL); + smt->kind = SMT_KIND_PUSH; + smt->free = mptcp_rbs_smt_push_free; + smt->execute = mptcp_rbs_smt_push_execute; + smt->sbf = sbf; + smt->skb = skb; + + return smt; +} + +void mptcp_rbs_smt_push_free(struct mptcp_rbs_smt_push *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +void mptcp_rbs_smt_push_execute(struct mptcp_rbs_smt_push *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + struct sk_buff *skb; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return; + + ctx->rbs_cb->execution_bucket += 5; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_PUSH, + sbf, skb, self->skb->reinject); +} + +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_clone( + const struct mptcp_rbs_smt_push *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_push *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_push), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->sbf = (struct mptcp_rbs_value_sbf *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->sbf, user_ctx, user_func); + clone->skb = (struct mptcp_rbs_value_skb *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->skb, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_int *value) +{ + struct mptcp_rbs_smt_set_user *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_set_user), GFP_KERNEL); + smt->kind = SMT_KIND_SET_USER; + smt->free = mptcp_rbs_smt_set_user_free; + smt->execute = mptcp_rbs_smt_set_user_execute; + smt->sbf = sbf; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_set_user_free(struct mptcp_rbs_smt_set_user *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +void mptcp_rbs_smt_set_user_execute(struct mptcp_rbs_smt_set_user *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + s64 val; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return; + + val = self->value->execute(self->value, ctx); + + /* even eval to null is a side effect */ + ctx->side_effects = 1; + + if (val != -1) { +// *((unsigned long*)&sbf->mptcp->mptcp_sched[0]) = val; + mptcp_rbs_get_sbf_cb(sbf)->user = val; + } +} + +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_clone( + const struct mptcp_rbs_smt_set_user *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_set_user *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_set_user), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->sbf = (struct mptcp_rbs_value_sbf *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->sbf, user_ctx, user_func); + clone->value = (struct mptcp_rbs_value_int *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_new( + int reg_number, struct mptcp_rbs_value_int *value) +{ + struct mptcp_rbs_smt_set *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_set), GFP_KERNEL); + smt->kind = SMT_KIND_SET; + smt->free = mptcp_rbs_smt_set_free; + smt->execute = mptcp_rbs_smt_set_execute; + smt->reg_number = reg_number; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_set_free(struct mptcp_rbs_smt_set *self) +{ + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +void mptcp_rbs_smt_set_execute(struct mptcp_rbs_smt_set *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->value->execute(self->value, ctx); + + /* even eval to null is a side effect */ + ctx->side_effects = 1; + + if (val != -1) + ctx->rbs_cb->regs[self->reg_number] = val; +} + +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_clone( + const struct mptcp_rbs_smt_set *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_set *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_set), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->value = (struct mptcp_rbs_value_int *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_new(int var_number, bool is_lazy, + struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_smt_var *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_var), GFP_KERNEL); + smt->kind = SMT_KIND_VAR; + smt->free = mptcp_rbs_smt_var_free; + smt->execute = mptcp_rbs_smt_var_execute; + smt->var_number = var_number; + smt->is_lazy = is_lazy; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_var_free(struct mptcp_rbs_smt_var *self) +{ + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +static struct tcp_sock **sbf_list_store(struct mptcp_rbs_value_sbf_list *value, + struct mptcp_rbs_eval_ctx *ctx) +{ + int len = 0; + bool is_null; + void *prev = NULL; + struct tcp_sock *sbf; + struct tcp_sock **list; + struct tcp_sock **tmp; + + sbf = value->execute(value, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (sbf) { + ++len; + sbf = value->execute(value, ctx, &prev, &is_null); + } + + list = kmalloc(sizeof(struct tcp_sock *) * (len + 1), GFP_ATOMIC); + if (!list) { + mptcp_rbs_debug("WARNING: Cannot allocate %zu bytes to store a " + "subflow list inside a variable. Setting " + "variable value to NULL\n", + sizeof(struct tcp_sock *) * (len + 1)); + return NULL; + } + + tmp = list; + prev = NULL; + sbf = value->execute(value, ctx, &prev, &is_null); + while (sbf) { + *tmp = sbf; + ++tmp; + sbf = value->execute(value, ctx, &prev, &is_null); + } + *tmp = NULL; /* implicit end with NULL */ + +if(len == 0) { + printk("%s allocates space for %d subflows\n", __func__, len); +} else if(len == 1) { + printk("%s allocates space for %d subflows, with first one %p\n", __func__, len, *list); +} else if(len > 1) { + printk("%s allocates space for %d subflows, with first one %p, second %p\n", __func__, len, *list, *(list + 1)); +} + + return list; +} + +static struct sk_buff **skb_list_store(struct mptcp_rbs_value_skb_list *value, + struct mptcp_rbs_eval_ctx *ctx) +{ + int len = 0; + bool is_null; + void *prev = NULL; + struct sk_buff *skb; + struct sk_buff **list; + struct sk_buff **tmp; + + skb = value->execute(value, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (skb) { + ++len; + skb = value->execute(value, ctx, &prev, &is_null); + } + + list = kmalloc(sizeof(struct sk_buff *) * (len + 1), GFP_ATOMIC); + if (!list) { + mptcp_rbs_debug("WARNING: Cannot allocate %zu bytes to store a " + "sockbuffer list inside a variable. Setting " + "variable value to NULL\n", + sizeof(struct sk_buff *) * (len + 1)); + return NULL; + } + + tmp = list; + prev = NULL; + skb = value->execute(value, ctx, &prev, &is_null); + while (skb) { + *tmp = skb; + ++tmp; + skb = value->execute(value, ctx, &prev, &is_null); + } + *tmp = NULL; + + return list; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_smt_var_execute(struct mptcp_rbs_smt_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + enum mptcp_rbs_type_kind type = + mptcp_rbs_value_get_type(self->value->kind); + + var->type = type; + var->is_lazy = self->is_lazy; + + if (self->is_lazy) + var->lazy_value = self->value; + else { + switch (type) { + case TYPE_KIND_NULL: + break; + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *bool_value = + (struct mptcp_rbs_value_bool *) self->value; + + var->bool_value = bool_value->execute(bool_value, ctx); + break; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *int_value = + (struct mptcp_rbs_value_int *) self->value; + + var->int_value = int_value->execute(int_value, ctx); + break; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *string_value = + (struct mptcp_rbs_value_string *) self->value; + + var->string_value = + string_value->execute(string_value, ctx); + break; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *sbf_value = + (struct mptcp_rbs_value_sbf *) self->value; + + var->sbf_value = sbf_value->execute(sbf_value, ctx); + break; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *sbf_list_value = + (struct mptcp_rbs_value_sbf_list *) self->value; +printk("%s for meta_sk %p\n", __func__, ctx->mpcb->meta_sk); + var->sbf_list_value = + sbf_list_store(sbf_list_value, ctx); + break; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *skb_value = + (struct mptcp_rbs_value_skb *) self->value; + + var->skb_value = skb_value->execute(skb_value, ctx); + break; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *skb_list_value = + (struct mptcp_rbs_value_skb_list *) self->value; + + var->skb_list_value = + skb_list_store(skb_list_value, ctx); + break; + } + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_clone( + const struct mptcp_rbs_smt_var *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_var), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->value = mptcp_rbs_value_clone(clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_new(struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_smt_void *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_void), GFP_KERNEL); + smt->kind = SMT_KIND_VOID; + smt->free = mptcp_rbs_smt_void_free; + smt->execute = mptcp_rbs_smt_void_execute; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_void_free(struct mptcp_rbs_smt_void *self) +{ + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_smt_void_execute(struct mptcp_rbs_smt_void *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + if (!self->value) + return; + + switch (mptcp_rbs_value_get_type(self->value->kind)) { + case TYPE_KIND_NULL: { + /* Do nothing */ + break; + } + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *value = + (struct mptcp_rbs_value_bool *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *value = + (struct mptcp_rbs_value_int *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *value = + (struct mptcp_rbs_value_string *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *value = + (struct mptcp_rbs_value_sbf *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *value = + (struct mptcp_rbs_value_sbf_list *) self->value; + void *prev = NULL; + bool is_null; + + value->execute(value, ctx, &prev, &is_null); + break; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *value = + (struct mptcp_rbs_value_skb *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *value = + (struct mptcp_rbs_value_skb_list *) self->value; + void *prev = NULL; + bool is_null; + + value->execute(value, ctx, &prev, &is_null); + break; + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_clone( + const struct mptcp_rbs_smt_void *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_void *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_void), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + if (clone->value) + clone->value = + mptcp_rbs_value_clone(clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_ebpf *mptcp_rbs_smt_ebpf_new(struct bpf_prog *prog, + char **strs, int strs_len) +{ + struct mptcp_rbs_smt_ebpf *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_ebpf), GFP_KERNEL); + smt->kind = SMT_KIND_EBPF; + smt->free = mptcp_rbs_smt_ebpf_free; + smt->execute = mptcp_rbs_smt_ebpf_execute; + smt->prog = prog; + smt->strs = strs; + smt->strs_len = strs_len; + + return smt; +} + +void mptcp_rbs_smt_ebpf_free(struct mptcp_rbs_smt_ebpf *self) +{ + int i; + + if (self->prog) + bpf_prog_free(self->prog); + if (self->strs) { + for (i = 0; i < self->strs_len; ++i) { + kfree(self->strs[i]); + } + kfree(self->strs); + } + kfree(self); +} + +void mptcp_rbs_smt_ebpf_execute(struct mptcp_rbs_smt_ebpf *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + BPF_PROG_RUN(self->prog, (struct sk_buff *) ctx); +} + +void mptcp_rbs_smts_free(struct mptcp_rbs_smt *smt) +{ + while (smt) { + struct mptcp_rbs_smt *old_smt = smt; + smt = smt->next; + old_smt->free(old_smt); + } +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +struct mptcp_rbs_smt *mptcp_rbs_smt_clone( + const struct mptcp_rbs_smt *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + switch (smt->kind) { + case SMT_KIND_DROP: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_drop_clone( + (const struct mptcp_rbs_smt_drop *) smt, user_ctx, + user_func); + case SMT_KIND_PRINT: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_print_clone( + (const struct mptcp_rbs_smt_print *) smt, user_ctx, + user_func); + case SMT_KIND_PUSH: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_push_clone( + (const struct mptcp_rbs_smt_push *) smt, user_ctx, + user_func); + case SMT_KIND_SET_USER: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_set_user_clone( + (const struct mptcp_rbs_smt_set_user*) smt, user_ctx, + user_func); + case SMT_KIND_SET: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_set_clone( + (const struct mptcp_rbs_smt_set *) smt, user_ctx, + user_func); + case SMT_KIND_VAR: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_var_clone( + (const struct mptcp_rbs_smt_var *) smt, user_ctx, + user_func); + case SMT_KIND_VOID: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_void_clone( + (const struct mptcp_rbs_smt_void *) smt, user_ctx, + user_func); + case SMT_KIND_EBPF: { + /* This should never be called */ + BUG_ON(true); + return NULL; + } + } +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +int mptcp_rbs_smt_print(const struct mptcp_rbs_smt *smt, char *buffer) +{ + int len; + int tmp_len; + + switch (smt->kind) { + case SMT_KIND_DROP: { + const struct mptcp_rbs_smt_drop *drop = + (const struct mptcp_rbs_smt_drop *) smt; + + len = sprintf_null(&buffer, "DROP("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) drop->skb, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_PRINT: { + const struct mptcp_rbs_smt_print *print = + (const struct mptcp_rbs_smt_print *) smt; + + len = sprintf_null(&buffer, "PRINT("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) print->msg, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + if (print->arg) { + len += sprintf_null(&buffer, ", "); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) print->arg, + buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + } + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_PUSH: { + const struct mptcp_rbs_smt_push *push = + (const struct mptcp_rbs_smt_push *) smt; + + len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) push->sbf, buffer); + if (buffer) + buffer += len; + len += sprintf_null(&buffer, ".PUSH("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) push->skb, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_SET_USER: { + const struct mptcp_rbs_smt_set_user *set_user = + (const struct mptcp_rbs_smt_set_user *) smt; + + len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) set_user->sbf, buffer); + if (buffer) + buffer += len; + len += sprintf_null(&buffer, ".SET_USER("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) set_user->value, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_SET: { + const struct mptcp_rbs_smt_set *set = + (const struct mptcp_rbs_smt_set *) smt; + + len = sprintf_null(&buffer, "SET(R%d, ", set->reg_number + 1); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) set->value, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_VAR: { + const struct mptcp_rbs_smt_var *var = + (const struct mptcp_rbs_smt_var *) smt; + + len = sprintf_null(&buffer, "VAR v%d = ", var->var_number + 1); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) var->value, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + if (var->is_lazy) + len += sprintf_null(&buffer, " LAZY"); + len += sprintf_null(&buffer, ";"); + return len; + } + case SMT_KIND_VOID: { + const struct mptcp_rbs_smt_void *void_ = + (const struct mptcp_rbs_smt_void *) smt; + + len = sprintf_null(&buffer, "VOID"); + if (void_->value) { + len += sprintf_null(&buffer, "("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) void_->value, + buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ")"); + } + len += sprintf_null(&buffer, ";"); + return len; + } + case SMT_KIND_EBPF: { + const struct mptcp_rbs_smt_ebpf *ebpf = + (const struct mptcp_rbs_smt_ebpf *) smt; + + return mptcp_rbs_ebpf_dump(ebpf->prog, buffer); + } + } +} +#pragma GCC diagnostic pop diff --git a/net/mptcp/mptcp_rbs_smt.h b/net/mptcp/mptcp_rbs_smt.h new file mode 100644 index 0000000000000..d328a9f8b6676 --- /dev/null +++ b/net/mptcp/mptcp_rbs_smt.h @@ -0,0 +1,221 @@ +#ifndef _MPTCP_RBS_SMT_H +#define _MPTCP_RBS_SMT_H + +#include + +struct bpf_prog; +struct mptcp_rbs_eval_ctx; + +#ifndef MPTCP_RBS_CLONE_USER_FUNC_DEFINED +#define MPTCP_RBS_CLONE_USER_FUNC_DEFINED +typedef struct mptcp_rbs_value *(*mptcp_rbs_value_clone_user_func)( + void *user_ctx, const struct mptcp_rbs_value *value); +#endif + +/* Enumeration of statement kinds */ +enum mptcp_rbs_smt_kind { + SMT_KIND_DROP, + SMT_KIND_PRINT, + SMT_KIND_PUSH, + SMT_KIND_SET_USER, + SMT_KIND_SET, + SMT_KIND_VAR, + SMT_KIND_VOID, + SMT_KIND_EBPF +}; + +/* Base struct to store a single statement */ +struct mptcp_rbs_smt { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt *self); + void (*execute)(struct mptcp_rbs_smt *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* Struct to store a drop statement */ +struct mptcp_rbs_smt_drop { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_drop *self); + void (*execute)(struct mptcp_rbs_smt_drop *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_smt_drop_free(struct mptcp_rbs_smt_drop *self); +void mptcp_rbs_smt_drop_execute(struct mptcp_rbs_smt_drop *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_clone( + const struct mptcp_rbs_smt_drop *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a print statement */ +struct mptcp_rbs_smt_print { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_print *self); + void (*execute)(struct mptcp_rbs_smt_print *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_string *msg; + struct mptcp_rbs_value *arg; +}; + +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_new( + struct mptcp_rbs_value_string *msg, struct mptcp_rbs_value *arg); +void mptcp_rbs_smt_print_free(struct mptcp_rbs_smt_print *self); +void mptcp_rbs_smt_print_execute(struct mptcp_rbs_smt_print *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_clone( + const struct mptcp_rbs_smt_print *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a push statement */ +struct mptcp_rbs_smt_push { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_push *self); + void (*execute)(struct mptcp_rbs_smt_push *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_smt_push_free(struct mptcp_rbs_smt_push *self); +void mptcp_rbs_smt_push_execute(struct mptcp_rbs_smt_push *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_clone( + const struct mptcp_rbs_smt_push *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a set_user statement */ +struct mptcp_rbs_smt_set_user { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_set_user *self); + void (*execute)(struct mptcp_rbs_smt_set_user *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; + struct mptcp_rbs_value_int *value; +}; + +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_int *value); +void mptcp_rbs_smt_set_user_free(struct mptcp_rbs_smt_set_user *self); +void mptcp_rbs_smt_set_user_execute(struct mptcp_rbs_smt_set_user *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_clone( + const struct mptcp_rbs_smt_set_user *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a set statement */ +struct mptcp_rbs_smt_set { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_set *self); + void (*execute)(struct mptcp_rbs_smt_set *self, + struct mptcp_rbs_eval_ctx *ctx); + int reg_number; + struct mptcp_rbs_value_int *value; +}; + +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_new( + int reg_number, struct mptcp_rbs_value_int *value); +void mptcp_rbs_smt_set_free(struct mptcp_rbs_smt_set *self); +void mptcp_rbs_smt_set_execute(struct mptcp_rbs_smt_set *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_clone( + const struct mptcp_rbs_smt_set *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a var statement */ +struct mptcp_rbs_smt_var { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_var *self); + void (*execute)(struct mptcp_rbs_smt_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; + bool is_lazy; + struct mptcp_rbs_value *value; +}; + +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_new(int var_number, bool is_lazy, + struct mptcp_rbs_value *value); +void mptcp_rbs_smt_var_free(struct mptcp_rbs_smt_var *self); +void mptcp_rbs_smt_var_execute(struct mptcp_rbs_smt_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_clone( + const struct mptcp_rbs_smt_var *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a void statement. This statement is only used for + * measurements! + */ +struct mptcp_rbs_smt_void { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_void *self); + void (*execute)(struct mptcp_rbs_smt_void *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value *value; +}; + +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_new( + struct mptcp_rbs_value *value); +void mptcp_rbs_smt_void_free(struct mptcp_rbs_smt_void *self); +void mptcp_rbs_smt_void_execute(struct mptcp_rbs_smt_void *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_clone( + const struct mptcp_rbs_smt_void *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store generated eBPF code */ +struct mptcp_rbs_smt_ebpf { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_ebpf *self); + void (*execute)(struct mptcp_rbs_smt_ebpf *self, + struct mptcp_rbs_eval_ctx *ctx); + struct bpf_prog *prog; + char **strs; + int strs_len; +}; + +struct mptcp_rbs_smt_ebpf *mptcp_rbs_smt_ebpf_new(struct bpf_prog *prog, + char **strs, int strs_len); +void mptcp_rbs_smt_ebpf_free(struct mptcp_rbs_smt_ebpf *self); +void mptcp_rbs_smt_ebpf_execute(struct mptcp_rbs_smt_ebpf *self, + struct mptcp_rbs_eval_ctx *ctx); + +/* + * Releases all statements starting with the given one + */ +void mptcp_rbs_smts_free(struct mptcp_rbs_smt *smt); + +/* + * Creates a copy of a statement and all its subvalues + * @smt: The statement to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_smt *mptcp_rbs_smt_clone( + const struct mptcp_rbs_smt *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Writes a string representation of a statement to the given buffer + * @smt: The statement + * @buffer: Pointer to the buffer where the string should be stored or NULL + * @return: Number of written characters + */ +int mptcp_rbs_smt_print(const struct mptcp_rbs_smt *smt, char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_type.c b/net/mptcp/mptcp_rbs_type.c new file mode 100644 index 0000000000000..22150f753a953 --- /dev/null +++ b/net/mptcp/mptcp_rbs_type.c @@ -0,0 +1,18 @@ +#include "mptcp_rbs_type.h" + +/* Array to map mptcp_rbs_type_kind items to their names */ +static const char *type_names[] = { + "null", + "boolean", + "integer", + "string", + "subflow", + "subflow list", + "sockbuffer", + "sockbuffer list" +}; + +const char *mptcp_rbs_type_get_name(enum mptcp_rbs_type_kind type) +{ + return type_names[type]; +} diff --git a/net/mptcp/mptcp_rbs_type.h b/net/mptcp/mptcp_rbs_type.h new file mode 100644 index 0000000000000..591d802f9cf35 --- /dev/null +++ b/net/mptcp/mptcp_rbs_type.h @@ -0,0 +1,21 @@ +#ifndef _MPTCP_RBS_TYPE_H +#define _MPTCP_RBS_TYPE_H + +/* Enumeration of type kinds */ +enum mptcp_rbs_type_kind { + TYPE_KIND_NULL, + TYPE_KIND_BOOL, + TYPE_KIND_INT, + TYPE_KIND_STRING, + TYPE_KIND_SBF, + TYPE_KIND_SBFLIST, + TYPE_KIND_SKB, + TYPE_KIND_SKBLIST +}; + +/* + * Returns the name of a type + */ +const char *mptcp_rbs_type_get_name(enum mptcp_rbs_type_kind type); + +#endif diff --git a/net/mptcp/mptcp_rbs_user.c b/net/mptcp/mptcp_rbs_user.c new file mode 100644 index 0000000000000..eb9e54d9db843 --- /dev/null +++ b/net/mptcp/mptcp_rbs_user.c @@ -0,0 +1,932 @@ +#include "mptcp_rbs_user.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_scheduler.h" +#include <../fs/proc/internal.h> +#include +#include +#include + +struct rbs_dir; + +struct rbs_sub_dir { + struct rbs_sub_dir *next; + struct mptcp_rbs_scheduler *scheduler; + struct proc_dir_entry *dir; +}; + +struct rbs_dir { + struct rbs_dir *next; + struct proc_dir_entry *dir; + struct rbs_sub_dir *first_sub_dir; +}; + +static struct rbs_dir *first_rbs_dir = NULL; +static DEFINE_MUTEX(delete_mutex); +static struct mptcp_rbs_scheduler *schedulers_to_delete = NULL; + +static void link_rbs_sub_dir(struct rbs_dir *dir, struct rbs_sub_dir *sub_dir) +{ + sub_dir->next = dir->first_sub_dir; + dir->first_sub_dir = sub_dir; +} + +static void unlink_rbs_sub_dir(struct rbs_dir *dir, struct rbs_sub_dir *sub_dir) +{ + struct rbs_sub_dir *tmp = dir->first_sub_dir; + struct rbs_sub_dir *prev = NULL; + + while (tmp != sub_dir) { + if (!tmp) + return; + + prev = tmp; + tmp = tmp->next; + } + + if (!prev) + dir->first_sub_dir = sub_dir->next; + else + prev->next = sub_dir->next; + + sub_dir->next = NULL; +} + +static void link_rbs_dir(struct rbs_dir *dir) +{ + dir->next = first_rbs_dir; + first_rbs_dir = dir; +} + +static void unlink_rbs_dir(struct rbs_dir *dir) +{ + struct rbs_dir *tmp = first_rbs_dir; + struct rbs_dir *prev = NULL; + + while (tmp != dir) { + if (!tmp) + return; + + prev = tmp; + tmp = tmp->next; + } + + if (!prev) + first_rbs_dir = dir->next; + else + prev->next = dir->next; + + dir->next = NULL; +} + +static void link_scheduler_to_delete(struct mptcp_rbs_scheduler *scheduler) +{ + struct mptcp_rbs_scheduler *tmp; + + mutex_lock(&delete_mutex); + + /* Check if the scheduler is already in the list */ + tmp = schedulers_to_delete; + while (tmp) { + if (tmp == scheduler) + break; + tmp = tmp->next; + } + + if (tmp != scheduler) { + scheduler->next = schedulers_to_delete; + schedulers_to_delete = scheduler; + } + + mutex_unlock(&delete_mutex); +} + +static bool create_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler); +static void remove_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler); + +static void delete_scheduler(struct work_struct *work) +{ + struct mptcp_rbs_scheduler *scheduler; + struct rbs_dir *rbs_dir; + + mutex_lock(&delete_mutex); + + while (schedulers_to_delete) { + scheduler = schedulers_to_delete->next; + + /* Delete the proc entries for the scheduler in all rbs + * directories + */ + rbs_dir = first_rbs_dir; + while (rbs_dir) { + remove_scheduler_sub_dir(rbs_dir, schedulers_to_delete); + rbs_dir = rbs_dir->next; + } + + /* Remove the CFG */ + mptcp_rbs_scheduler_free(schedulers_to_delete); + + schedulers_to_delete = scheduler; + } + + mutex_unlock(&delete_mutex); +} + +static DECLARE_WORK(delete_task, delete_scheduler); + +/* + * info proc entry + */ + +static int info_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "Number of OOO advanced hits %u\n", mptcp_ooo_number_matches); + + return 0; +} + +static int info_open(struct inode *inode, struct file *file) +{ + return single_open(file, info_show, PDE_DATA(inode)); +} + +static const struct file_operations info_file_ops = { + .owner = THIS_MODULE, + .open = info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * schedulers proc entry + */ + +static int schedulers_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = + mptcp_rbs_scheduler_get_registered(); + + seq_printf(seq, "id: scheduler name\n"); + while (scheduler) { + seq_printf(seq, "%p: %s\n", scheduler, scheduler->name); + scheduler = scheduler->next; + } + + return 0; +} + +static int schedulers_open(struct inode *inode, struct file *file) +{ + return single_open(file, schedulers_show, PDE_DATA(inode)); +} + +struct strbuffer { + char *start; + int len; +}; + +static ssize_t schedulers_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + struct strbuffer *buffer = + ((struct seq_file *) file->private_data)->private; + + if (!size) + return 0; + + if (buffer) { + char *new_start = + krealloc(buffer->start, buffer->len + size + 1, GFP_ATOMIC); + if (!new_start) { + printk("RBS: Could not allocate memory for scheduler " + "code\n"); + return -1; + } + + buffer->start = new_start; + } else { + buffer = kmalloc(sizeof(struct strbuffer), GFP_KERNEL); + if (!buffer) { + printk("RBS: Could not allocate memory for scheduler " + "code\n"); + return -1; + } + + buffer->len = 0; + buffer->start = kmalloc(size + 1, GFP_KERNEL); + if (!buffer->start) { + kfree(buffer); + printk("RBS: Could not allocate memory for scheduler " + "code\n"); + return -1; + } + } + ((struct seq_file *) file->private_data)->private = buffer; + + buffer->start[buffer->len + size] = 0; + copy_from_user(&buffer->start[buffer->len], buf, size); + buffer->len += size; + + return size; +} + +static int schedulers_release(struct inode *inode, struct file *file) +{ + struct strbuffer *buffer = + ((struct seq_file *) file->private_data)->private; + struct mptcp_rbs_scheduler *scheduler; + struct rbs_dir *rbs_dir; + + if (!buffer) + return 0; + + /* Parse the scheduler */ + scheduler = mptcp_rbs_scheduler_parse(buffer->start); + kfree(buffer->start); + kfree(buffer); + if (!scheduler) + return -1; + + /* Optimize the scheduler */ + if (mptcp_rbs_opts_enabled) { + mptcp_rbs_optimize(&scheduler->variations[0], &scheduler->del, + 0, false); + } + + /* Add the scheduler */ + if (!strcmp(scheduler->name, "info") || + !strcmp(scheduler->name, "schedulers") || + !strcmp(scheduler->name, "default") || + !mptcp_rbs_scheduler_register(scheduler)) { + mptcp_rbs_scheduler_free(scheduler); + return -2; + } + + /* Create the proc entries for the scheduler in every rbs directory */ + rbs_dir = first_rbs_dir; + while (rbs_dir) { + create_scheduler_sub_dir(rbs_dir, scheduler); + rbs_dir = rbs_dir->next; + } + + return 0; +} + +static const struct file_operations schedulers_file_ops = { + .owner = THIS_MODULE, + .open = schedulers_open, + .read = seq_read, + .write = schedulers_write, + .llseek = seq_lseek, + .release = schedulers_release, +}; + +/* + * default proc entry + */ + +static int default_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%s\n", mptcp_rbs_scheduler_get_default()->name); + + return 0; +} + +static int default_open(struct inode *inode, struct file *file) +{ + return single_open(file, default_show, PDE_DATA(inode)); +} + +static ssize_t default_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + char *str; + char *trimmed_str; + struct mptcp_rbs_scheduler *scheduler = NULL; + const struct mptcp_rbs_scheduler *tmp = + mptcp_rbs_scheduler_get_registered(); + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Find scheduler with the given name */ + while (tmp) { + if (!strcmp(trimmed_str, tmp->name)) { + scheduler = (struct mptcp_rbs_scheduler *) tmp; + break; + } + + tmp = tmp->next; + } + kfree(str); + + if (!scheduler) + return -1; + + mptcp_rbs_scheduler_set_default(scheduler); + return size; +} + +static const struct file_operations default_file_ops = { + .owner = THIS_MODULE, + .open = default_open, + .read = seq_read, + .write = default_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * opt proc entry + */ + +int mptcp_rbs_opts_enabled = +#ifdef CONFIG_MPTCP_RBSOPT +#ifdef CONFIG_MPTCP_RBSEBPF + 2 +#else + 1 +#endif +#else + 0 +#endif + ; + +static int opt_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%d\n", mptcp_rbs_opts_enabled); + return 0; +} + +static int opt_open(struct inode *inode, struct file *file) +{ + return single_open(file, opt_show, PDE_DATA(inode)); +} + +static ssize_t opt_write(struct file *file, const char __user *buf, size_t size, + loff_t *offset) +{ + char *str; + char *trimmed_str; + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + if (!strcmp(trimmed_str, "0")) + mptcp_rbs_opts_enabled = 0; +#ifdef CONFIG_MPTCP_RBSOPT + else if (!strcmp(trimmed_str, "1")) + mptcp_rbs_opts_enabled = 1; +#ifdef CONFIG_MPTCP_RBSEBPF + else if (!strcmp(trimmed_str, "2")) + mptcp_rbs_opts_enabled = 2; +#endif +#endif + else + return -1; + + return size; +} + +static const struct file_operations opt_file_ops = { + .owner = THIS_MODULE, + .open = opt_open, + .read = seq_read, + .write = opt_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * measurements proc entry for default and round robin scheduler + */ + +extern u64 total_default_time_skb; +extern u64 total_default_time_no_skb; +extern u64 total_default_count_skb; +extern u64 total_default_count_no_skb; + +extern u64 total_rr_time_skb; +extern u64 total_rr_time_no_skb; +extern u64 total_rr_count_skb; +extern u64 total_rr_count_no_skb; + +static int measurements2_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "Default:\n"); + seq_printf(seq, " state executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + seq_printf(seq, " no_skb %13llu %13llu\n", + total_default_count_no_skb, total_default_time_no_skb); + seq_printf(seq, " skb %13llu %13llu\n", total_default_count_skb, + total_default_time_skb); +#endif + + seq_printf(seq, "\n"); + seq_printf(seq, "Round Robin:\n"); + seq_printf(seq, " state executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + seq_printf(seq, " no_skb %13llu %13llu\n", total_rr_count_no_skb, + total_rr_time_no_skb); + seq_printf(seq, " skb %13llu %13llu\n", total_rr_count_skb, + total_rr_time_skb); +#endif + + return 0; +} + +static int measurements2_open(struct inode *inode, struct file *file) +{ + return single_open(file, measurements2_show, PDE_DATA(inode)); +} + +static ssize_t measurements2_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + char *str; + char *trimmed_str; + bool b; + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Check if value == "0" */ + if (!strtobool(trimmed_str, &b) && !b) { +#ifdef CONFIG_MPTCP_RBSMEASURE + total_default_time_skb = 0; + total_default_time_no_skb = 0; + total_default_count_skb = 0; + total_default_count_no_skb = 0; + + total_rr_time_skb = 0; + total_rr_time_no_skb = 0; + total_rr_count_skb = 0; + total_rr_count_no_skb = 0; +#endif + kfree(str); + return size; + } + kfree(str); + + return -1; +} + +static const struct file_operations measurements2_file_ops = { + .owner = THIS_MODULE, + .open = measurements2_open, + .read = seq_read, + .write = measurements2_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * delete proc entry (per scheduler) + */ + +static int delete_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "0\n"); + + return 0; +} + +static int delete_open(struct inode *inode, struct file *file) +{ + return single_open(file, delete_show, PDE_DATA(inode)); +} + +static ssize_t delete_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + struct mptcp_rbs_scheduler *scheduler = + ((struct seq_file *) file->private_data)->private; + char *str; + char *trimmed_str; + bool b; + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Check if value == "1" */ + if (!strtobool(trimmed_str, &b) && b) { + scheduler->del = true; + kfree(str); + return size; + } + kfree(str); + + return -1; +} + +static int delete_release(struct inode *inode, struct file *file) +{ + struct mptcp_rbs_scheduler *scheduler = PDE_DATA(inode); + int result = single_release(inode, file); + + if (scheduler->del) { + /* Check if the scheduler is in use */ + if (scheduler->usage > 0 || + scheduler == mptcp_rbs_scheduler_get_default()) { + /* Cannot delete the scheduler */ + scheduler->del = false; + } else { + /* Remove the scheduler from usable ones */ + mptcp_rbs_scheduler_unregister(scheduler); + + /* Put the scheduler in the queue of schedulers to + * delete + */ + link_scheduler_to_delete(scheduler); + + /* Schedule the delete task because we cannot delete the + * delete proc entry here + */ + schedule_work(&delete_task); + } + } + + return result; +} + +static const struct file_operations delete_file_ops = { + .owner = THIS_MODULE, + .open = delete_open, + .read = seq_read, + .write = delete_write, + .llseek = seq_lseek, + .release = delete_release, +}; + +/* + * dump proc entry (per scheduler) + */ + +static int dump_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = seq->private; + int len = 0; + int variation_count = 0; + int i; + int pos; + char *str; + + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + len += strlen("Subflows :\n") + (i > 9 ? 2 : 1) + 1 + + mptcp_rbs_scheduler_print(scheduler, i, NULL); + ++variation_count; + } + + str = kzalloc(len + 1, GFP_KERNEL); + if (!str) { + seq_printf(seq, "\n"); + return 0; + } + + pos = 0; + for (i = 0; i < variation_count; ++i) { + pos += sprintf(&str[pos], "Subflows %d:\n", + scheduler->variations[i].sbf_num); + pos += mptcp_rbs_scheduler_print(scheduler, i, &str[pos]); + pos += sprintf(&str[pos], "\n"); + } + + seq_printf(seq, "%s", str); + kfree(str); + + return 0; +} + +static int dump_open(struct inode *inode, struct file *file) +{ + return single_open(file, dump_show, PDE_DATA(inode)); +} + +static const struct file_operations dump_file_ops = { + .owner = THIS_MODULE, + .open = dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * measurements proc entry (per scheduler) + */ + +static int measurements_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = seq->private; +#ifdef CONFIG_MPTCP_RBSMEASURE + int i; +#endif + + seq_printf(seq, " subflows executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + seq_printf(seq, "%10d %13llu %13llu\n", + scheduler->variations[i].sbf_num, + scheduler->variations[i].exec_count, + scheduler->variations[i].total_time); + } +#endif + + seq_printf(seq, "\n"); + seq_printf(seq, " state executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + seq_printf(seq, "noa_no_skb %13llu %13llu\n", + scheduler->total_count_noa_no_skb, + scheduler->total_time_noa_no_skb); + seq_printf(seq, " noa_skb %13llu %13llu\n", + scheduler->total_count_noa_skb, + scheduler->total_time_noa_skb); + seq_printf(seq, " oa_skb %13llu %13llu\n", + scheduler->total_count_oa_skb, scheduler->total_time_oa_skb); + seq_printf(seq, "execno_skb %13llu %13llu\n", + scheduler->total_exec_count_no_skb, + scheduler->total_exec_time_no_skb); + seq_printf(seq, " exec_skb %13llu %13llu\n", + scheduler->total_exec_count_skb, + scheduler->total_exec_time_skb); +#endif + + return 0; +} + +/* + * scheduler info proc entry (per scheduler) + */ + +static int scheduler_info_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = seq->private; + + seq_printf(seq, "Total bytes sent %llu\n", scheduler->total_bytes_sent); + return 0; +} + +static int measurements_open(struct inode *inode, struct file *file) +{ + return single_open(file, measurements_show, PDE_DATA(inode)); +} + +static ssize_t measurements_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + struct mptcp_rbs_scheduler *scheduler = + ((struct seq_file *) file->private_data)->private; + char *str; + char *trimmed_str; + bool b; +#ifdef CONFIG_MPTCP_RBSMEASURE + int i; +#endif + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Check if value == "0" */ + if (!strtobool(trimmed_str, &b) && !b) { +#ifdef CONFIG_MPTCP_RBSMEASURE + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + scheduler->variations[i].exec_count = 0; + scheduler->variations[i].total_time = 0; + } + + scheduler->total_count_noa_no_skb = 0; + scheduler->total_time_noa_no_skb = 0; + scheduler->total_count_noa_skb = 0; + scheduler->total_time_noa_skb = 0; + scheduler->total_count_oa_skb = 0; + scheduler->total_time_oa_skb = 0; + scheduler->total_exec_count_no_skb = 0; + scheduler->total_exec_time_no_skb = 0; + scheduler->total_exec_count_skb = 0; + scheduler->total_exec_time_skb = 0; +#endif + kfree(str); + return size; + } + kfree(str); + + return -1; +} + +static const struct file_operations measurements_file_ops = { + .owner = THIS_MODULE, + .open = measurements_open, + .read = seq_read, + .write = measurements_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int scheduler_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, scheduler_info_show, PDE_DATA(inode)); +} + +static const struct file_operations scheduler_info_file_ops = { + .owner = THIS_MODULE, + .open = scheduler_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static bool create_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler) +{ + struct proc_dir_entry *dir; + struct rbs_sub_dir *rbs_sub_dir; + + dir = proc_mkdir(scheduler->name, rbs_dir->dir); + if (!dir) + return false; + + rbs_sub_dir = kmalloc(sizeof(struct rbs_sub_dir), GFP_KERNEL); + rbs_sub_dir->scheduler = scheduler; + rbs_sub_dir->dir = dir; + link_rbs_sub_dir(rbs_dir, rbs_sub_dir); + + mutex_lock(&delete_mutex); + proc_create_data("delete", S_IRUGO, dir, &delete_file_ops, scheduler); + proc_create_data("dump", S_IRUGO, dir, &dump_file_ops, scheduler); + proc_create_data("measurements", S_IRUGO, dir, &measurements_file_ops, + scheduler); + proc_create_data("info", S_IRUGO, dir, &scheduler_info_file_ops, scheduler); + mutex_unlock(&delete_mutex); + + return true; +} + +static void remove_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler) +{ + struct rbs_sub_dir *rbs_sub_dir = rbs_dir->first_sub_dir; + + while (rbs_sub_dir) { + if (rbs_sub_dir->scheduler == scheduler) { + remove_proc_entry("delete", rbs_sub_dir->dir); + remove_proc_entry("dump", rbs_sub_dir->dir); + remove_proc_entry("measurements", rbs_sub_dir->dir); + remove_proc_entry("info", rbs_sub_dir->dir); + remove_proc_entry(scheduler->name, rbs_dir->dir); + + unlink_rbs_sub_dir(rbs_dir, rbs_sub_dir); + kfree(rbs_sub_dir); + break; + } + + rbs_sub_dir = rbs_sub_dir->next; + } +} + +static int init_subsys(struct net *net) +{ +#ifndef NS3 + struct rbs_dir *rbs_dir; + struct proc_dir_entry *dir; + struct mptcp_rbs_scheduler *scheduler; + + rbs_dir = kmalloc(sizeof(struct rbs_dir), GFP_KERNEL); + + dir = proc_mkdir_data("rbs", 0, net->mptcp.proc_net_mptcp, rbs_dir); + if (!dir) { + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create_data("info", S_IRUGO, dir, &info_file_ops, net)) { + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create_data("schedulers", S_IRUGO, dir, &schedulers_file_ops, + NULL)) { + remove_proc_entry("info", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create("default", S_IRUGO, dir, &default_file_ops)) { + remove_proc_entry("schedulers", dir); + remove_proc_entry("info", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create("opt", S_IRUGO, dir, &opt_file_ops)) { + remove_proc_entry("schedulers", dir); + remove_proc_entry("info", dir); + remove_proc_entry("default", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create("measurements", S_IRUGO, dir, + &measurements2_file_ops)) { + remove_proc_entry("schedulers", dir); + remove_proc_entry("info", dir); + remove_proc_entry("default", dir); + remove_proc_entry("opt", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + rbs_dir->dir = dir; + rbs_dir->first_sub_dir = NULL; + link_rbs_dir(rbs_dir); + net->mptcp.proc_net_mptcp_rbs = dir; + + scheduler = mptcp_rbs_scheduler_get_registered(); + while (scheduler) { + create_scheduler_sub_dir(rbs_dir, scheduler); + scheduler = scheduler->next; + } +#endif + + return 0; +} + +static void exit_subsys(struct net *net) +{ +#ifndef NS3 + struct rbs_dir *rbs_dir; + struct mptcp_rbs_scheduler *scheduler; + + if (!net->mptcp.proc_net_mptcp_rbs) + return; + + rbs_dir = net->mptcp.proc_net_mptcp_rbs->data; + + scheduler = mptcp_rbs_scheduler_get_registered(); + while (scheduler) { + remove_scheduler_sub_dir(rbs_dir, scheduler); + scheduler = scheduler->next; + } + + remove_proc_entry("info", rbs_dir->dir); + remove_proc_entry("schedulers", rbs_dir->dir); + remove_proc_entry("default", rbs_dir->dir); + remove_proc_entry("opt", rbs_dir->dir); + remove_proc_entry("measurements", rbs_dir->dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + net->mptcp.proc_net_mptcp_rbs = NULL; + + unlink_rbs_dir(rbs_dir); + kfree(rbs_dir); +#endif +} + +static struct pernet_operations proc_ops = { + .init = init_subsys, + .exit = exit_subsys, +}; + +bool mptcp_rbs_user_interface_init(void) +{ + return !register_pernet_subsys(&proc_ops); +} diff --git a/net/mptcp/mptcp_rbs_user.h b/net/mptcp/mptcp_rbs_user.h new file mode 100644 index 0000000000000..ccb23589f4370 --- /dev/null +++ b/net/mptcp/mptcp_rbs_user.h @@ -0,0 +1,19 @@ +#ifndef _MPTCP_RBS_USER_H +#define _MPTCP_RBS_USER_H + +#include + +/* Variable to indicate that optimizations are enabled. + * 0: Optimizations are disabled + * 1: CFG based optimizations enabled + * 2: CFG based optimizations + eBPF code generation enabled + */ +extern int mptcp_rbs_opts_enabled; + +/* + * Initializes the user proc interface + * @return: false if an error occurred + */ +bool mptcp_rbs_user_interface_init(void); + +#endif diff --git a/net/mptcp/mptcp_rbs_value.c b/net/mptcp/mptcp_rbs_value.c new file mode 100644 index 0000000000000..6d34946ad4bb3 --- /dev/null +++ b/net/mptcp/mptcp_rbs_value.c @@ -0,0 +1,5518 @@ +#include "mptcp_rbs_value.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_lexer.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_sched.h" +#include + +/* Macro to clone values */ +#define APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + case ENUM: { \ + return (struct mptcp_rbs_value *) STRUCT##_clone( \ + ctx, (const struct STRUCT *) value); \ + } + +/* Macro to print values */ +#define APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + case ENUM: { \ + return STRUCT##_print((const struct STRUCT *) value, buffer); \ + } + +/* Macro to get the type of a value */ +#define APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) \ + case ENUM: \ + return RETURNTYPE; + +/* Context only for cloning. This is necessary since we cannot simply copy + * sbf/skb progress references to filters. Instead we have to replace the + * pointer. Note that we assume that no more than MAX_NESTING filters are nested + */ +struct mptcp_rbs_value_clone_ctx { +#define MAX_NESTING 10 + struct { + const void *repl; + void *repl_with; + } repls[MAX_NESTING]; + void *user_ctx; + mptcp_rbs_value_clone_user_func user_func; +}; + +struct mptcp_rbs_value *mptcp_rbs_value_clone_ex( + struct mptcp_rbs_value_clone_ctx *ctx, const struct mptcp_rbs_value *value); + +#define CLONE(val) \ + val = (typeof(val)) mptcp_rbs_value_clone_ex( \ + ctx, (struct mptcp_rbs_value *) val) + +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_new(unsigned int num) +{ + struct mptcp_rbs_value_constint *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_constint), GFP_KERNEL); + value->kind = VALUE_KIND_CONSTINT; + value->free = &mptcp_rbs_value_constint_free; + value->execute = &mptcp_rbs_value_constint_execute; + value->value = num; + + return value; +} + +void mptcp_rbs_value_constint_free(struct mptcp_rbs_value_constint *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_constint_execute(struct mptcp_rbs_value_constint *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return self->value; +} + +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_constint *value) +{ + struct mptcp_rbs_value_constint *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_constint), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_constint_print(const struct mptcp_rbs_value_constint *value, + char *buffer) +{ + return sprintf_null(&buffer, "%u", value->value); +} + +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_new(char *str) +{ + struct mptcp_rbs_value_conststring *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_conststring), GFP_KERNEL); + value->kind = VALUE_KIND_CONSTSTRING; + value->free = &mptcp_rbs_value_conststring_free; + value->execute = &mptcp_rbs_value_conststring_execute; + value->value = str; + + return value; +} + +void mptcp_rbs_value_conststring_free(struct mptcp_rbs_value_conststring *self) +{ + kfree(self->value); + kfree(self); +} + +char *mptcp_rbs_value_conststring_execute( + struct mptcp_rbs_value_conststring *self, struct mptcp_rbs_eval_ctx *ctx) +{ + return self->value; +} + +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_conststring *value) +{ + struct mptcp_rbs_value_conststring *clone; + int len; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_conststring), GFP_KERNEL); + *clone = *value; + len = strlen(value->value); + clone->value = kmalloc(len + 1, GFP_KERNEL); + memcpy(clone->value, value->value, len + 1); + + return clone; +} + +int mptcp_rbs_value_conststring_print( + const struct mptcp_rbs_value_conststring *value, char *buffer) +{ + int len = 0; + char *str; + + len = replace_with_escape_chars(value->value, false); + if (!buffer) + return len + 2; + + str = kmalloc(len + 1, GFP_KERNEL); + memcpy(str, value->value, strlen(value->value) + 1); + replace_with_escape_chars(str, true); + len = sprintf_null(&buffer, "\"%s\"", str); + kfree(str); + return len; +} + +struct mptcp_rbs_value_null *mptcp_rbs_value_null_new(void) +{ + struct mptcp_rbs_value_null *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_null), GFP_KERNEL); + value->kind = VALUE_KIND_NULL; + value->free = &mptcp_rbs_value_null_free; + value->execute = &mptcp_rbs_value_null_execute; + + return value; +} + +void mptcp_rbs_value_null_free(struct mptcp_rbs_value_null *self) +{ + kfree(self); +} + +s32 mptcp_rbs_value_null_execute(struct mptcp_rbs_value_null *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return -1; +} + +struct mptcp_rbs_value_null *mptcp_rbs_value_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_null *value) +{ + struct mptcp_rbs_value_null *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_null), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_null_print(const struct mptcp_rbs_value_null *value, + char *buffer) +{ + return sprintf_null(&buffer, "NULL"); +} + +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_new(int var_number) +{ + struct mptcp_rbs_value_bool_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_bool_var), GFP_KERNEL); + value->kind = VALUE_KIND_BOOL_VAR; + value->free = mptcp_rbs_value_bool_var_free; + value->execute = mptcp_rbs_value_bool_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_bool_var_free(struct mptcp_rbs_value_bool_var *self) +{ + kfree(self); +} + +s32 mptcp_rbs_value_bool_var_execute(struct mptcp_rbs_value_bool_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_bool *value = + (struct mptcp_rbs_value_bool *) var->lazy_value; + var->bool_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->bool_value; +} + +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_bool_var *value) +{ + struct mptcp_rbs_value_bool_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_bool_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_bool_var_print(const struct mptcp_rbs_value_bool_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_new(int var_number) +{ + struct mptcp_rbs_value_int_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_int_var), GFP_KERNEL); + value->kind = VALUE_KIND_INT_VAR; + value->free = mptcp_rbs_value_int_var_free; + value->execute = mptcp_rbs_value_int_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_int_var_free(struct mptcp_rbs_value_int_var *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_int_var_execute(struct mptcp_rbs_value_int_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_int *value = + (struct mptcp_rbs_value_int *) var->lazy_value; + var->int_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->int_value; +} + +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_int_var *value) +{ + struct mptcp_rbs_value_int_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_int_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_int_var_print(const struct mptcp_rbs_value_int_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_new( + int var_number) +{ + struct mptcp_rbs_value_string_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_string_var), GFP_KERNEL); + value->kind = VALUE_KIND_STRING_VAR; + value->free = mptcp_rbs_value_string_var_free; + value->execute = mptcp_rbs_value_string_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_string_var_free(struct mptcp_rbs_value_string_var *self) +{ + kfree(self); +} + +char *mptcp_rbs_value_string_var_execute( + struct mptcp_rbs_value_string_var *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_string *value = + (struct mptcp_rbs_value_string *) var->lazy_value; + var->string_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->string_value; +} + +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_string_var *value) +{ + struct mptcp_rbs_value_string_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_string_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_string_var_print( + const struct mptcp_rbs_value_string_var *value, char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_new(int var_number) +{ + struct mptcp_rbs_value_sbf_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_var), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_VAR; + value->free = mptcp_rbs_value_sbf_var_free; + value->execute = mptcp_rbs_value_sbf_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_sbf_var_free(struct mptcp_rbs_value_sbf_var *self) +{ + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_var_execute( + struct mptcp_rbs_value_sbf_var *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_sbf *value = + (struct mptcp_rbs_value_sbf *) var->lazy_value; + var->sbf_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->sbf_value; +} + +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_var *value) +{ + struct mptcp_rbs_value_sbf_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_sbf_var_print(const struct mptcp_rbs_value_sbf_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_new( + int var_number) +{ + struct mptcp_rbs_value_sbf_list_var *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_var), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_VAR; + value->free = mptcp_rbs_value_sbf_list_var_free; + value->execute = mptcp_rbs_value_sbf_list_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_sbf_list_var_free( + struct mptcp_rbs_value_sbf_list_var *self) +{ + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_var_execute( + struct mptcp_rbs_value_sbf_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + struct tcp_sock **entry; + +printk("%s self %p *prev %p var->is_lazy %u coming from %pS for meta_sk %p\n", __func__, self, *prev, var->is_lazy, __builtin_return_address(0), ctx->mpcb->meta_sk); + + if (var->is_lazy) { + struct mptcp_rbs_value_sbf_list *value = + (struct mptcp_rbs_value_sbf_list *) var->lazy_value; + return value->execute(value, ctx, prev, is_null); + } + + if (!var->sbf_list_value) { + *is_null = true; + return NULL; + } + *is_null = false; + + if (*prev) + entry = ((struct tcp_sock **) *prev) + 1; + else + entry = var->sbf_list_value; + + if (*entry) + *prev = entry; + +printk("%s returns %p with is at %p\n", __func__, *entry, entry); + + return *entry; +} + +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_var *value) +{ + struct mptcp_rbs_value_sbf_list_var *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_sbf_list_var_print( + const struct mptcp_rbs_value_sbf_list_var *value, char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_new(int var_number, + bool reinject) +{ + struct mptcp_rbs_value_skb_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_var), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_VAR; + value->free = mptcp_rbs_value_skb_var_free; + value->execute = mptcp_rbs_value_skb_var_execute; + value->reinject = reinject; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_skb_var_free(struct mptcp_rbs_value_skb_var *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_var_execute( + struct mptcp_rbs_value_skb_var *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_skb *value = + (struct mptcp_rbs_value_skb *) var->lazy_value; + var->skb_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->skb_value; +} + +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_var *value) +{ + struct mptcp_rbs_value_skb_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_skb_var_print(const struct mptcp_rbs_value_skb_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_new( + int var_number, enum mptcp_rbs_value_kind underlying_queue_kind) +{ + struct mptcp_rbs_value_skb_list_var *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_var), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_VAR; + value->free = mptcp_rbs_value_skb_list_var_free; + value->execute = mptcp_rbs_value_skb_list_var_execute; + value->var_number = var_number; + value->underlying_queue_kind = underlying_queue_kind; + + return value; +} + +void mptcp_rbs_value_skb_list_var_free( + struct mptcp_rbs_value_skb_list_var *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_var_execute( + struct mptcp_rbs_value_skb_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + struct sk_buff **entry; + + if (var->is_lazy) { + struct mptcp_rbs_value_skb_list *value = + (struct mptcp_rbs_value_skb_list *) var->lazy_value; + return value->execute(value, ctx, prev, is_null); + } + + if (!var->skb_list_value) { + *is_null = true; + return NULL; + } + *is_null = false; + + if (*prev) + entry = ((struct sk_buff **) *prev) + 1; + else + entry = var->skb_list_value; + + if (*entry) + *prev = entry; + + return *entry; +} + +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_var *value) +{ + struct mptcp_rbs_value_skb_list_var *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_skb_list_var_print( + const struct mptcp_rbs_value_skb_list_var *value, char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_not *mptcp_rbs_value_not_new( + struct mptcp_rbs_value_bool *operand) +{ + struct mptcp_rbs_value_not *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_not), GFP_KERNEL); + value->kind = VALUE_KIND_NOT; + value->free = &mptcp_rbs_value_not_free; + value->execute = &mptcp_rbs_value_not_execute; + value->operand = operand; + + return value; +} + +void mptcp_rbs_value_not_free(struct mptcp_rbs_value_not *self) +{ + MPTCP_RBS_VALUE_FREE(self->operand); + kfree(self); +} + +s32 mptcp_rbs_value_not_execute(struct mptcp_rbs_value_not *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s32 b = self->operand->execute(self->operand, ctx); + + if (b == 0) + return 1; + if (b == -1) + return -1; + return 0; +} + +struct mptcp_rbs_value_not *mptcp_rbs_value_not_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_not *value) +{ + struct mptcp_rbs_value_not *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_not), GFP_KERNEL); + *clone = *value; + CLONE(clone->operand); + + return clone; +} + +int mptcp_rbs_value_not_print(const struct mptcp_rbs_value_not *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "!"); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + return len; +} + +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_equal *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_equal), GFP_KERNEL); + value->kind = VALUE_KIND_EQUAL; + value->free = &mptcp_rbs_value_equal_free; + value->execute = &mptcp_rbs_value_equal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_equal_free(struct mptcp_rbs_value_equal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_equal_execute(struct mptcp_rbs_value_equal *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == right) + return 1; + return 0; +} + +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_equal *value) +{ + struct mptcp_rbs_value_equal *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_equal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_equal_print(const struct mptcp_rbs_value_equal *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " == "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_unequal *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_unequal), GFP_KERNEL); + value->kind = VALUE_KIND_UNEQUAL; + value->free = &mptcp_rbs_value_unequal_free; + value->execute = &mptcp_rbs_value_unequal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_unequal_free(struct mptcp_rbs_value_unequal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_unequal_execute(struct mptcp_rbs_value_unequal *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == right) + return 0; + return 1; +} + +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_unequal *value) +{ + struct mptcp_rbs_value_unequal *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_unequal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_unequal_print(const struct mptcp_rbs_value_unequal *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " != "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_less *mptcp_rbs_value_less_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_less *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_less), GFP_KERNEL); + value->kind = VALUE_KIND_LESS; + value->free = &mptcp_rbs_value_less_free; + value->execute = &mptcp_rbs_value_less_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_less_free(struct mptcp_rbs_value_less *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_less_execute(struct mptcp_rbs_value_less *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left < right) + return 1; + return 0; +} + +struct mptcp_rbs_value_less *mptcp_rbs_value_less_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less *value) +{ + struct mptcp_rbs_value_less *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_less), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_less_print(const struct mptcp_rbs_value_less *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " < "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_less_equal *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_less_equal), GFP_KERNEL); + value->kind = VALUE_KIND_LESS_EQUAL; + value->free = &mptcp_rbs_value_less_equal_free; + value->execute = &mptcp_rbs_value_less_equal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_less_equal_free(struct mptcp_rbs_value_less_equal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_less_equal_execute(struct mptcp_rbs_value_less_equal *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left <= right) + return 1; + return 0; +} + +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less_equal *value) +{ + struct mptcp_rbs_value_less_equal *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_less_equal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_less_equal_print( + const struct mptcp_rbs_value_less_equal *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " <= "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_greater *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_greater), GFP_KERNEL); + value->kind = VALUE_KIND_GREATER; + value->free = &mptcp_rbs_value_greater_free; + value->execute = &mptcp_rbs_value_greater_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_greater_free(struct mptcp_rbs_value_greater *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_greater_execute(struct mptcp_rbs_value_greater *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left > right) + return 1; + return 0; +} + +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater *value) +{ + struct mptcp_rbs_value_greater *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_greater), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_greater_print(const struct mptcp_rbs_value_greater *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " > "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_greater_equal *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_greater_equal), GFP_KERNEL); + value->kind = VALUE_KIND_GREATER_EQUAL; + value->free = &mptcp_rbs_value_greater_equal_free; + value->execute = &mptcp_rbs_value_greater_equal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_greater_equal_free( + struct mptcp_rbs_value_greater_equal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_greater_equal_execute( + struct mptcp_rbs_value_greater_equal *self, struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left >= right) + return 1; + return 0; +} + +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater_equal *value) +{ + struct mptcp_rbs_value_greater_equal *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_greater_equal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_greater_equal_print( + const struct mptcp_rbs_value_greater_equal *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " >= "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_and *mptcp_rbs_value_and_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand) +{ + struct mptcp_rbs_value_and *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_and), GFP_KERNEL); + value->kind = VALUE_KIND_AND; + value->free = &mptcp_rbs_value_and_free; + value->execute = &mptcp_rbs_value_and_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_and_free(struct mptcp_rbs_value_and *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_and_execute(struct mptcp_rbs_value_and *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s32 val = self->left_operand->execute(self->left_operand, ctx); + if (val <= 0) + return 0; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val <= 0) + return 0; + + return 1; +} + +struct mptcp_rbs_value_and *mptcp_rbs_value_and_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_and *value) +{ + struct mptcp_rbs_value_and *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_and), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_and_print(const struct mptcp_rbs_value_and *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " AND "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_or *mptcp_rbs_value_or_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand) +{ + struct mptcp_rbs_value_or *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_or), GFP_KERNEL); + value->kind = VALUE_KIND_OR; + value->free = &mptcp_rbs_value_or_free; + value->execute = &mptcp_rbs_value_or_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_or_free(struct mptcp_rbs_value_or *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_or_execute(struct mptcp_rbs_value_or *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s32 val = self->left_operand->execute(self->left_operand, ctx); + if (val == 1) + return 1; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == 1) + return 1; + + return 0; +} + +struct mptcp_rbs_value_or *mptcp_rbs_value_or_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_or *value) +{ + struct mptcp_rbs_value_or *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_or), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_or_print(const struct mptcp_rbs_value_or *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " OR "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_add *mptcp_rbs_value_add_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_add *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_add), GFP_KERNEL); + value->kind = VALUE_KIND_ADD; + value->free = &mptcp_rbs_value_add_free; + value->execute = &mptcp_rbs_value_add_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_add_free(struct mptcp_rbs_value_add *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_add_execute(struct mptcp_rbs_value_add *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1) + return -1; + result += val; + + return result; +} + +struct mptcp_rbs_value_add *mptcp_rbs_value_add_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_add *value) +{ + struct mptcp_rbs_value_add *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_add), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_add_print(const struct mptcp_rbs_value_add *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " + "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_subtract *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_subtract), GFP_KERNEL); + value->kind = VALUE_KIND_SUBTRACT; + value->free = &mptcp_rbs_value_subtract_free; + value->execute = &mptcp_rbs_value_subtract_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_subtract_free(struct mptcp_rbs_value_subtract *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_subtract_execute(struct mptcp_rbs_value_subtract *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1) + return -1; + result -= val; + + return result; +} + +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subtract *value) +{ + struct mptcp_rbs_value_subtract *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_subtract), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_subtract_print(const struct mptcp_rbs_value_subtract *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " - "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_multiply *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_multiply), GFP_KERNEL); + value->kind = VALUE_KIND_MULTIPLY; + value->free = &mptcp_rbs_value_multiply_free; + value->execute = &mptcp_rbs_value_multiply_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_multiply_free(struct mptcp_rbs_value_multiply *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_multiply_execute(struct mptcp_rbs_value_multiply *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1) + return -1; + result *= val; + + return result; +} + +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_multiply *value) +{ + struct mptcp_rbs_value_multiply *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_multiply), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_multiply_print(const struct mptcp_rbs_value_multiply *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " * "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_divide *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_divide), GFP_KERNEL); + value->kind = VALUE_KIND_DIVIDE; + value->free = &mptcp_rbs_value_divide_free; + value->execute = &mptcp_rbs_value_divide_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_divide_free(struct mptcp_rbs_value_divide *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_divide_execute(struct mptcp_rbs_value_divide *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1 || !val) + return -1; + result /= val; + + return result; +} + +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_divide *value) +{ + struct mptcp_rbs_value_divide *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_divide), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_divide_print(const struct mptcp_rbs_value_divide *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " / "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_remainder *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_remainder), GFP_KERNEL); + value->kind = VALUE_KIND_REMAINDER; + value->free = &mptcp_rbs_value_remainder_free; + value->execute = &mptcp_rbs_value_remainder_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_remainder_free(struct mptcp_rbs_value_remainder *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_remainder_execute(struct mptcp_rbs_value_remainder *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1 || !val) + return -1; + result %= val; + + return result; +} + +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_remainder *value) +{ + struct mptcp_rbs_value_remainder *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_remainder), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_remainder_print( + const struct mptcp_rbs_value_remainder *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " % "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_new( + struct mptcp_rbs_value *operand) +{ + struct mptcp_rbs_value_is_null *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_is_null), GFP_KERNEL); + value->kind = VALUE_KIND_IS_NULL; + value->free = &mptcp_rbs_value_is_null_free; + value->execute = &mptcp_rbs_value_is_null_execute; + value->operand = operand; + + return value; +} + +void mptcp_rbs_value_is_null_free(struct mptcp_rbs_value_is_null *self) +{ + MPTCP_RBS_VALUE_FREE(self->operand); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +s32 mptcp_rbs_value_is_null_execute(struct mptcp_rbs_value_is_null *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + switch (mptcp_rbs_value_get_type(self->operand->kind)) { + case TYPE_KIND_NULL: + return 1; + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *bool_value = + (struct mptcp_rbs_value_bool *) self->operand; + + return bool_value->execute(bool_value, ctx) == -1; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *int_value = + (struct mptcp_rbs_value_int *) self->operand; + + return int_value->execute(int_value, ctx) == -1; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *string_value = + (struct mptcp_rbs_value_string *) self->operand; + + return string_value->execute(string_value, ctx) == NULL; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *sbf_value = + (struct mptcp_rbs_value_sbf *) self->operand; + + return sbf_value->execute(sbf_value, ctx) == NULL; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *sbf_list_value = + (struct mptcp_rbs_value_sbf_list *) self->operand; + void *prev = NULL; + bool is_null; + + sbf_list_value->execute(sbf_list_value, ctx, &prev, &is_null); + return is_null; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *skb_value = + (struct mptcp_rbs_value_skb *) self->operand; + + return skb_value->execute(skb_value, ctx) == NULL; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *skb_list_value = + (struct mptcp_rbs_value_skb_list *) self->operand; + void *prev = NULL; + bool is_null; + + skb_list_value->execute(skb_list_value, ctx, &prev, &is_null); + return is_null; + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_null *value) +{ + struct mptcp_rbs_value_is_null *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_is_null), GFP_KERNEL); + *clone = *value; + CLONE(clone->operand); + + return clone; +} + +int mptcp_rbs_value_is_null_print(const struct mptcp_rbs_value_is_null *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print(value->operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " == NULL)"); + return len; +} + +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_new( + struct mptcp_rbs_value *operand) +{ + struct mptcp_rbs_value_is_not_null *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_is_not_null), GFP_KERNEL); + value->kind = VALUE_KIND_IS_NOT_NULL; + value->free = &mptcp_rbs_value_is_not_null_free; + value->execute = &mptcp_rbs_value_is_not_null_execute; + value->operand = operand; + + return value; +} + +void mptcp_rbs_value_is_not_null_free(struct mptcp_rbs_value_is_not_null *self) +{ + MPTCP_RBS_VALUE_FREE(self->operand); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +s32 mptcp_rbs_value_is_not_null_execute( + struct mptcp_rbs_value_is_not_null *self, struct mptcp_rbs_eval_ctx *ctx) +{ + switch (mptcp_rbs_value_get_type(self->operand->kind)) { + case TYPE_KIND_NULL: + return 0; + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *bool_value = + (struct mptcp_rbs_value_bool *) self->operand; + + return bool_value->execute(bool_value, ctx) != -1; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *int_value = + (struct mptcp_rbs_value_int *) self->operand; + + return int_value->execute(int_value, ctx) != -1; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *string_value = + (struct mptcp_rbs_value_string *) self->operand; + + return string_value->execute(string_value, ctx) != NULL; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *sbf_value = + (struct mptcp_rbs_value_sbf *) self->operand; + + return sbf_value->execute(sbf_value, ctx) != NULL; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *sbf_list_value = + (struct mptcp_rbs_value_sbf_list *) self->operand; + void *prev = NULL; + bool is_null; + + sbf_list_value->execute(sbf_list_value, ctx, &prev, &is_null); + return !is_null; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *skb_value = + (struct mptcp_rbs_value_skb *) self->operand; + + return skb_value->execute(skb_value, ctx) != NULL; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *skb_list_value = + (struct mptcp_rbs_value_skb_list *) self->operand; + void *prev = NULL; + bool is_null; + + skb_list_value->execute(skb_list_value, ctx, &prev, &is_null); + return !is_null; + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_not_null *value) +{ + struct mptcp_rbs_value_is_not_null *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_is_not_null), GFP_KERNEL); + *clone = *value; + CLONE(clone->operand); + + return clone; +} + +int mptcp_rbs_value_is_not_null_print( + const struct mptcp_rbs_value_is_not_null *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print(value->operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " != NULL)"); + return len; +} + +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_new(int reg_number) +{ + struct mptcp_rbs_value_reg *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_reg), GFP_KERNEL); + value->kind = VALUE_KIND_REG; + value->free = mptcp_rbs_value_reg_free; + value->execute = mptcp_rbs_value_reg_execute; + value->reg_number = reg_number; + + return value; +} + +void mptcp_rbs_value_reg_free(struct mptcp_rbs_value_reg *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_reg_execute(struct mptcp_rbs_value_reg *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return ctx->rbs_cb->regs[self->reg_number]; +} + +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_reg *value) +{ + struct mptcp_rbs_value_reg *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_reg), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_reg_print(const struct mptcp_rbs_value_reg *value, + char *buffer) +{ + return sprintf_null(&buffer, "R%d", value->reg_number + 1); +} + +struct mptcp_rbs_value_q *mptcp_rbs_value_q_new(void) +{ + struct mptcp_rbs_value_q *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_q), GFP_KERNEL); + value->kind = VALUE_KIND_Q; + value->free = &mptcp_rbs_value_q_free; + value->execute = &mptcp_rbs_value_q_execute; + value->underlying_queue_kind = VALUE_KIND_Q; + + return value; +} + +/* skip packets which should not be in the queue */ +struct sk_buff *mptcp_rbs_next_in_queue(struct sk_buff_head *queue, + struct sk_buff *skb) +{ + while (skb && TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue) { + mptcp_debug("%s skipping skb %p with seq %u and end_seq %u as " + "it not in queue\n", + __func__, skb, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq); + + if (skb_queue_is_last(queue, skb)) + return NULL; + else + skb = skb_queue_next(queue, skb); + } + + return skb; +} + +void mptcp_rbs_value_q_free(struct mptcp_rbs_value_q *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_q_execute(struct mptcp_rbs_value_q *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct sk_buff *skb_candidate; + struct sk_buff *skb_result; + + if (*prev) { + skb_candidate = (struct sk_buff *) *prev; + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, + skb_candidate)) + skb_candidate = NULL; + else + skb_candidate = skb_queue_next( + &ctx->meta_sk->sk_write_queue, skb_candidate); + } else { + skb_candidate = ctx->rbs_cb->queue_position; + } + + skb_result = mptcp_rbs_next_in_queue(&ctx->meta_sk->sk_write_queue, + skb_candidate); + + *is_null = false; + if (skb_result) + *prev = skb_result; + + return skb_result; +} + +struct mptcp_rbs_value_q *mptcp_rbs_value_q_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_q *value) +{ + struct mptcp_rbs_value_q *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_q), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_q_print(const struct mptcp_rbs_value_q *value, char *buffer) +{ + return sprintf_null(&buffer, "Q"); +} + +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_new(void) +{ + struct mptcp_rbs_value_qu *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_qu), GFP_KERNEL); + value->kind = VALUE_KIND_QU; + value->free = &mptcp_rbs_value_qu_free; + value->execute = &mptcp_rbs_value_qu_execute; + value->underlying_queue_kind = VALUE_KIND_QU; + + return value; +} + +void mptcp_rbs_value_qu_free(struct mptcp_rbs_value_qu *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_qu_execute(struct mptcp_rbs_value_qu *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct sk_buff *skb; + + if (*prev) { + skb = (struct sk_buff *) *prev; + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb)) + skb = NULL; + else { + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + } else { + if (ctx->meta_sk->sk_write_queue.qlen == 0) + /* queue is empty, rq is empty */ + skb = NULL; + else + /* start with write_queue.next */ + skb = skb_peek(&ctx->meta_sk->sk_write_queue); + } + + mptcp_debug("%s with prev %p has a candidate of %p\n", __func__, *prev, + skb); + + // TODO in the old version, we also checked for skb->next == + // queue_position + if (skb == ctx->rbs_cb->queue_position) { + mptcp_debug( + "%s skb %p matches the queue_position, we are at the end\n", + __func__, skb); + skb = NULL; + } + + // we can not use the approach of Q and RQ, as we have to check for + // queue_position + // skb_result = next_in_queue(&ctx->meta_sk->sk_write_queue, + // skb_candidate); + + while (skb && TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue) { + mptcp_debug("%s skips skb %p\n", __func__, skb); + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb) || + /* Empty because it points to the element in Q */ + skb == ctx->rbs_cb->queue_position) { + skb = NULL; + break; + } else + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + + *is_null = false; + if (skb) + *prev = skb; + + return skb; +} + +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_qu *value) +{ + struct mptcp_rbs_value_qu *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_qu), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_qu_print(const struct mptcp_rbs_value_qu *value, + char *buffer) +{ + return sprintf_null(&buffer, "QU"); +} + +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_new(void) +{ + struct mptcp_rbs_value_rq *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_rq), GFP_KERNEL); + value->kind = VALUE_KIND_RQ; + value->free = &mptcp_rbs_value_rq_free; + value->execute = &mptcp_rbs_value_rq_execute; + value->underlying_queue_kind = VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_rq_free(struct mptcp_rbs_value_rq *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_rq_execute(struct mptcp_rbs_value_rq *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct sk_buff *skb_candidate; + struct sk_buff *skb_result; + + if (*prev) { + skb_candidate = (struct sk_buff *) *prev; + if (skb_queue_is_last(&ctx->mpcb->reinject_queue, + skb_candidate)) { + skb_candidate = NULL; + } else { + skb_candidate = skb_queue_next( + &ctx->mpcb->reinject_queue, skb_candidate); + } + } else { + skb_candidate = skb_peek(&ctx->mpcb->reinject_queue); + } + + skb_result = + mptcp_rbs_next_in_queue(&ctx->mpcb->reinject_queue, skb_candidate); + + mptcp_debug("%s with candidate %p and prev %p for rq %p with rq.len %u " + "has result %p\n", + __func__, skb_candidate, *prev, &ctx->mpcb->reinject_queue, + ctx->mpcb->reinject_queue.qlen, skb_result); + + *is_null = false; + if (skb_result) + *prev = skb_result; + + return skb_result; +} + +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_rq *value) +{ + struct mptcp_rbs_value_rq *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_rq), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_rq_print(const struct mptcp_rbs_value_rq *value, + char *buffer) +{ + return sprintf_null(&buffer, "RQ"); +} + +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_new(void) +{ + struct mptcp_rbs_value_subflows *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_subflows), GFP_KERNEL); + value->kind = VALUE_KIND_SUBFLOWS; + value->free = mptcp_rbs_value_subflows_free; + value->execute = mptcp_rbs_value_subflows_execute; + + return value; +} + +void mptcp_rbs_value_subflows_free(struct mptcp_rbs_value_subflows *self) +{ + kfree(self); +} + +bool mptcp_rbs_sbf_is_available(struct tcp_sock *sbf) +{ + /* Set of states for which we are allowed to send data */ + if (!mptcp_sk_can_send((struct sock *) sbf)) { + mptcp_debug("sbf_is_available %p can not send -> false\n", sbf); + return false; + } + + /* We do not send data on this subflow unless it is + * fully established, i.e. the 4th ack has been received. + */ + if (sbf->mptcp->pre_established) { + mptcp_debug("sbf_is_available %p preestablished -> false\n", + sbf); + return false; + } + + if (sbf->pf) { + mptcp_debug("sbf_is_available %p pf -> false\n", sbf); + return false; + } + + /*if (inet_csk((struct sock *) sbf)->icsk_ca_state == TCP_CA_Loss) { + mptcp_debug("sbf_is_available %p loss state -> false\n", sbf);*/ + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been + * acked. (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ +/* if (!tcp_is_reno(sbf)) + return false; + else if (sbf->snd_una != sbf->high_seq) + return false; + }*/ + + /* If TSQ is already throttling us, do not send on this subflow. When + * TSQ gets cleared the subflow becomes eligible again. + */ +/* + moved this test to a seperate prop + + if (test_bit(TSQ_THROTTLED, &sbf->tsq_flags)) { + mptcp_debug("sbf_is_available %p tso throttle -> false\n", sbf); + return false; + }*/ + + return true; +} + +struct tcp_sock *mptcp_rbs_value_subflows_execute( + struct mptcp_rbs_value_subflows *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct tcp_sock *sbf; + +//printk("%s self %p prev %p called\n", __func__, self, prev); + + if (*prev) + sbf = ((struct tcp_sock *) *prev)->mptcp->next; + else + sbf = ctx->mpcb->connection_list; + + /* Skip unavailable subflows */ + while (sbf && !mptcp_rbs_sbf_is_available(sbf)) { + printk("%s skips sbf %p for meta_sk %p coming from %pS\n", __func__, sbf, ctx->mpcb->meta_sk, __builtin_return_address(0)); + sbf = sbf->mptcp->next; + } + + *is_null = false; + if (sbf) + *prev = sbf; + +//printk("%s returns %p\n", __func__, sbf); + + return sbf; +} + +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subflows *value) +{ + struct mptcp_rbs_value_subflows *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_subflows), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_subflows_print(const struct mptcp_rbs_value_subflows *value, + char *buffer) +{ + return sprintf_null(&buffer, "SUBFLOWS"); +} + +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_new( + void) +{ + struct mptcp_rbs_value_current_time_ms *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_current_time_ms), GFP_KERNEL); + value->kind = VALUE_KIND_CURRENT_TIME_MS; + value->free = mptcp_rbs_value_current_time_ms_free; + value->execute = mptcp_rbs_value_current_time_ms_execute; + + return value; +} + +void mptcp_rbs_value_current_time_ms_free( + struct mptcp_rbs_value_current_time_ms *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_current_time_ms_execute( + struct mptcp_rbs_value_current_time_ms *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + u64 ct = ktime_get_raw_ns(); + u64 tp6 = 1000000; + return ct / tp6; +} + +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_current_time_ms *value) +{ + struct mptcp_rbs_value_current_time_ms *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_current_time_ms), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_current_time_ms_print( + const struct mptcp_rbs_value_current_time_ms *value, char *buffer) +{ + return sprintf_null(&buffer, "CURRENT_TIME_MS"); +} + +struct mptcp_rbs_value_random *mptcp_rbs_value_random_new(void) +{ + struct mptcp_rbs_value_random *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_random), GFP_KERNEL); + value->kind = VALUE_KIND_RANDOM; + value->free = mptcp_rbs_value_random_free; + value->execute = mptcp_rbs_value_random_execute; + + return value; +} + +void mptcp_rbs_value_random_free(struct mptcp_rbs_value_random *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_random_execute(struct mptcp_rbs_value_random *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + unsigned int n; + + get_random_bytes(&n, sizeof(unsigned int)); + return n; +} + +struct mptcp_rbs_value_random *mptcp_rbs_value_random_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_random *value) +{ + struct mptcp_rbs_value_random *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_random), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_random_print(const struct mptcp_rbs_value_random *value, + char *buffer) +{ + return sprintf_null(&buffer, "RANDOM"); +} + +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_rtt *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_rtt), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_RTT; + value->free = mptcp_rbs_value_sbf_rtt_free; + value->execute = mptcp_rbs_value_sbf_rtt_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_rtt_free(struct mptcp_rbs_value_sbf_rtt *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_rtt_execute(struct mptcp_rbs_value_sbf_rtt *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->srtt_us; +} + +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt *value) +{ + struct mptcp_rbs_value_sbf_rtt *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_rtt), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_rtt_print(const struct mptcp_rbs_value_sbf_rtt *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".RTT"); + return len; +} + +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_rtt_ms *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_ms), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_RTT_MS; + value->free = mptcp_rbs_value_sbf_rtt_ms_free; + value->execute = mptcp_rbs_value_sbf_rtt_ms_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_rtt_ms_free(struct mptcp_rbs_value_sbf_rtt_ms *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_rtt_ms_execute(struct mptcp_rbs_value_sbf_rtt_ms *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + // we now are _MS + return (sbf->srtt_us >> 3) / 1000; +} + +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_ms *value) +{ + struct mptcp_rbs_value_sbf_rtt_ms *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_ms), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_rtt_ms_print(const struct mptcp_rbs_value_sbf_rtt_ms *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".RTT_MS"); + return len; +} + +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_rtt_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_var), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_RTT_VAR; + value->free = mptcp_rbs_value_sbf_rtt_var_free; + value->execute = mptcp_rbs_value_sbf_rtt_var_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_rtt_var_free(struct mptcp_rbs_value_sbf_rtt_var *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_rtt_var_execute(struct mptcp_rbs_value_sbf_rtt_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->rttvar_us; +} + +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_var *value) +{ + struct mptcp_rbs_value_sbf_rtt_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_var), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_rtt_var_print(const struct mptcp_rbs_value_sbf_rtt_var *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".RTT_VAR"); + return len; +} + +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_user *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_user), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_USER; + value->free = mptcp_rbs_value_sbf_user_free; + value->execute = mptcp_rbs_value_sbf_user_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_user_free(struct mptcp_rbs_value_sbf_user *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_user_execute(struct mptcp_rbs_value_sbf_user *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + +// return *((s64*) &sbf->mptcp->mptcp_sched[0]); + return mptcp_rbs_get_sbf_cb(sbf)->user; +} + +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_user *value) +{ + struct mptcp_rbs_value_sbf_user *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_user), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_user_print(const struct mptcp_rbs_value_sbf_user *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".USER"); + return len; +} + +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_is_backup *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_is_backup), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_IS_BACKUP; + value->free = mptcp_rbs_value_sbf_is_backup_free; + value->execute = mptcp_rbs_value_sbf_is_backup_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_is_backup_free( + struct mptcp_rbs_value_sbf_is_backup *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_is_backup_execute( + struct mptcp_rbs_value_sbf_is_backup *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->mptcp->low_prio || sbf->mptcp->rcv_low_prio; +} + +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_is_backup *value) +{ + struct mptcp_rbs_value_sbf_is_backup *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_is_backup), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_is_backup_print( + const struct mptcp_rbs_value_sbf_is_backup *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".IS_BACKUP"); + return len; +} + +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_cwnd *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_cwnd), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_CWND; + value->free = mptcp_rbs_value_sbf_cwnd_free; + value->execute = mptcp_rbs_value_sbf_cwnd_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_cwnd_free(struct mptcp_rbs_value_sbf_cwnd *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_cwnd_execute(struct mptcp_rbs_value_sbf_cwnd *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->snd_cwnd; +} + +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_cwnd *value) +{ + struct mptcp_rbs_value_sbf_cwnd *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_cwnd), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_cwnd_print(const struct mptcp_rbs_value_sbf_cwnd *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".CWND"); + return len; +} + +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_queued *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_queued), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_QUEUED; + value->free = mptcp_rbs_value_sbf_queued_free; + value->execute = mptcp_rbs_value_sbf_queued_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_queued_free(struct mptcp_rbs_value_sbf_queued *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_queued_execute(struct mptcp_rbs_value_sbf_queued *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return (sbf->write_seq - sbf->snd_nxt) / sbf->mss_cache; +} + +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_queued *value) +{ + struct mptcp_rbs_value_sbf_queued *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_queued), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_queued_print(const struct mptcp_rbs_value_sbf_queued *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".QUEUED"); + return len; +} + +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_new(struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_skbs_in_flight *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_skbs_in_flight), + GFP_KERNEL); + value->kind = VALUE_KIND_SBF_SKBS_IN_FLIGHT; + value->free = mptcp_rbs_value_sbf_skbs_in_flight_free; + value->execute = mptcp_rbs_value_sbf_skbs_in_flight_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_skbs_in_flight_free( + struct mptcp_rbs_value_sbf_skbs_in_flight *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_skbs_in_flight_execute( + struct mptcp_rbs_value_sbf_skbs_in_flight *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->packets_out; +} + +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_skbs_in_flight *value) +{ + struct mptcp_rbs_value_sbf_skbs_in_flight *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_skbs_in_flight), + GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_skbs_in_flight_print( + const struct mptcp_rbs_value_sbf_skbs_in_flight *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SKBS_IN_FLIGHT"); + return len; +} + +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_lost_skbs *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_lost_skbs), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_LOST_SKBS; + value->free = mptcp_rbs_value_sbf_lost_skbs_free; + value->execute = mptcp_rbs_value_sbf_lost_skbs_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_lost_skbs_free( + struct mptcp_rbs_value_sbf_lost_skbs *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_lost_skbs_execute( + struct mptcp_rbs_value_sbf_lost_skbs *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->lost_out; +} + +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lost_skbs *value) +{ + struct mptcp_rbs_value_sbf_lost_skbs *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_lost_skbs), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_lost_skbs_print( + const struct mptcp_rbs_value_sbf_lost_skbs *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".LOST_SKBS"); + return len; +} + +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_new(struct mptcp_rbs_value_sbf *sbf, + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_sbf_has_window_for *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_has_window_for), + GFP_KERNEL); + value->kind = VALUE_KIND_SBF_HAS_WINDOW_FOR; + value->free = mptcp_rbs_value_sbf_has_window_for_free; + value->execute = mptcp_rbs_value_sbf_has_window_for_execute; + value->sbf = sbf; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_sbf_has_window_for_free( + struct mptcp_rbs_value_sbf_has_window_for *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_has_window_for_execute( + struct mptcp_rbs_value_sbf_has_window_for *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + unsigned int mss_now = tcp_current_mss(ctx->meta_sk); + struct tcp_sock *sbf; + struct sk_buff *skb; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + /* RBS copied from mptcp_sched.c */ + /* Don't send on this subflow if we bypass the allowed send-window at + * the per-subflow level. Similar to tcp_snd_wnd_test, but manually + * calculated end_seq (because here at this point end_seq is still at + * the meta-level). + */ + if (after(sbf->write_seq + min(skb->len, mss_now), tcp_wnd_end(sbf))) + return 0; + return 1; +} + +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_has_window_for *value) +{ + struct mptcp_rbs_value_sbf_has_window_for *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_has_window_for), + GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_sbf_has_window_for_print( + const struct mptcp_rbs_value_sbf_has_window_for *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".HAS_WINDOW_FOR("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_id *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_id), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_ID; + value->free = mptcp_rbs_value_sbf_id_free; + value->execute = mptcp_rbs_value_sbf_id_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_id_free(struct mptcp_rbs_value_sbf_id *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_id_execute(struct mptcp_rbs_value_sbf_id *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->mptcp->sbf_id; +} + +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_id *value) +{ + struct mptcp_rbs_value_sbf_id *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_id), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_id_print(const struct mptcp_rbs_value_sbf_id *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".ID"); + return len; +} + +/* some helper for delay calculation */ + +void mptcp_rbs_sbf_delay_update(struct tcp_sock *tp, const struct sk_buff *skb) +{ + /* Recalculate delays */ + /* Size considerations: we subtract two u32 values, the result might + * have a sign (requires 33 bit) + * However, we can safely ignore the highest bit of the u32 values + * (0x80000000), + * add 1 << 32 (0x80000000and) for the subtraction and store it as u32. + */ + struct mptcp_rbs_sbf_cb *sbf_cb = mptcp_rbs_get_sbf_cb(tp); + const unsigned int first_bit_set = 0x80000000; + const unsigned int first_bit_not_set = 0x7FFFFFFF; + sbf_cb->delay_in = + (first_bit_set + (tcp_time_stamp & first_bit_not_set)) - + (tp->rx_opt.rcv_tsval & first_bit_not_set); + sbf_cb->delay_out = + (first_bit_set + (tp->rx_opt.rcv_tsval & first_bit_not_set)) - + (tp->rx_opt.rcv_tsecr & first_bit_not_set); +// printk("rcv_tsval %u and rcv_tsecr %u lead to delay out %u and delay in %u\n", tp->rx_opt.rcv_tsval, tp->rx_opt.rcv_tsecr, sbf_cb->delay_out, sbf_cb->delay_in); +} + +/* delay out */ + +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_delay_out *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_delay_out), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_DELAY_OUT; + value->free = mptcp_rbs_value_sbf_delay_out_free; + value->execute = mptcp_rbs_value_sbf_delay_out_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_delay_out_free( + struct mptcp_rbs_value_sbf_delay_out *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_delay_out_execute( + struct mptcp_rbs_value_sbf_delay_out *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_get_sbf_cb(sbf)->delay_out; +} + +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_out *value) +{ + struct mptcp_rbs_value_sbf_delay_out *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_delay_out), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_delay_out_print( + const struct mptcp_rbs_value_sbf_delay_out *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".DELAY_OUT"); + return len; +} + +/* delay in */ + +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_delay_in *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_delay_in), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_DELAY_IN; + value->free = mptcp_rbs_value_sbf_delay_in_free; + value->execute = mptcp_rbs_value_sbf_delay_in_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_delay_in_free( + struct mptcp_rbs_value_sbf_delay_in *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_delay_in_execute( + struct mptcp_rbs_value_sbf_delay_in *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_get_sbf_cb(sbf)->delay_in; +} + +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_in *value) +{ + struct mptcp_rbs_value_sbf_delay_in *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_delay_in), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_delay_in_print( + const struct mptcp_rbs_value_sbf_delay_in *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".DELAY_IN"); + return len; +} + +/* some helper for bw calculation */ + +u64 mptcp_rbs_sbf_get_bw_send(struct mptcp_rbs_sbf_cb *sbf_cb) +{ + u64 RBS_BW_INTERVAL_NS = 1000000000; // one second + u64 diff = ktime_get_raw_ns() - sbf_cb->bw_out_last_update_ns; + + /* how much time is gone since the last update */ + if (diff > RBS_BW_INTERVAL_NS) + return 0; + + /* only use the portion of one second */ + return (RBS_BW_INTERVAL_NS - diff) * sbf_cb->bw_out_bytes / + RBS_BW_INTERVAL_NS; +} + +u64 mptcp_rbs_sbf_get_bw_ack(struct mptcp_rbs_sbf_cb *sbf_cb) +{ + u64 RBS_BW_INTERVAL_NS = 1000000000; // one second + u64 diff = ktime_get_raw_ns() - sbf_cb->bw_ack_last_update_ns; + + /* how much time is gone since the last update */ + if (diff > RBS_BW_INTERVAL_NS) + return 0; + + /* only use the portion of one second */ + return (RBS_BW_INTERVAL_NS - diff) * sbf_cb->bw_ack_bytes / + RBS_BW_INTERVAL_NS; +} + +void mptcp_rbs_sbf_bw_add(u64 *last_update_ns, u64 *bytes_in_cb, + unsigned int bytes) +{ + u64 ct = ktime_get_raw_ns(); + u64 RBS_BW_INTERVAL_NS = 1000000000; // one second + + mptcp_debug("rbs_bw compares %llu and %llu, or more precise, the diff " + "is %llu\n", + ct, (*last_update_ns), (ct - *last_update_ns)); + + if (bytes == 0) // nothing to do + return; + + if (ct - *last_update_ns < + RBS_BW_INTERVAL_NS) { // delta t less than a second + u64 delta_t = ct - (*last_update_ns); + *bytes_in_cb = (*bytes_in_cb) * (RBS_BW_INTERVAL_NS - delta_t) / + RBS_BW_INTERVAL_NS + + bytes; + mptcp_debug("rbs_bw sets bw to %llu with delta_t %llu\n", + *bytes_in_cb, delta_t); + } else { + *bytes_in_cb = bytes; + mptcp_debug( + "rbs_bw sets new value after more than 1 second to %u\n", + bytes); + } + + *last_update_ns = ct; +} + +void mptcp_rbs_sbf_bw_send_add(struct tcp_sock *tp, unsigned int bytes) +{ + struct mptcp_rbs_sbf_cb *sbf_cb = mptcp_rbs_get_sbf_cb(tp); + mptcp_rbs_sbf_bw_add(&sbf_cb->bw_out_last_update_ns, + &sbf_cb->bw_out_bytes, bytes); +} + +void mptcp_rbs_sbf_bw_ack_add(struct tcp_sock *tp, unsigned int bytes) +{ + struct mptcp_rbs_sbf_cb *sbf_cb = mptcp_rbs_get_sbf_cb(tp); + mptcp_rbs_sbf_bw_add(&sbf_cb->bw_ack_last_update_ns, + &sbf_cb->bw_ack_bytes, bytes); +} + +/* bw out ack */ + +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_bw_out_ack *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_ack), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_BW_OUT_ACK; + value->free = mptcp_rbs_value_sbf_bw_out_ack_free; + value->execute = mptcp_rbs_value_sbf_bw_out_ack_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_bw_out_ack_free( + struct mptcp_rbs_value_sbf_bw_out_ack *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_bw_out_ack_execute( + struct mptcp_rbs_value_sbf_bw_out_ack *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_sbf_get_bw_ack(mptcp_rbs_get_sbf_cb(sbf)); +} + +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_ack *value) +{ + struct mptcp_rbs_value_sbf_bw_out_ack *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_ack), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_bw_out_ack_print( + const struct mptcp_rbs_value_sbf_bw_out_ack *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".BW_OUT_ACK"); + return len; +} + +/* bw out send */ + +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_bw_out_send *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_send), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_BW_OUT_SEND; + value->free = mptcp_rbs_value_sbf_bw_out_send_free; + value->execute = mptcp_rbs_value_sbf_bw_out_send_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_bw_out_send_free( + struct mptcp_rbs_value_sbf_bw_out_send *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_bw_out_send_execute( + struct mptcp_rbs_value_sbf_bw_out_send *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_sbf_get_bw_send(mptcp_rbs_get_sbf_cb(sbf)); +} + +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_send *value) +{ + struct mptcp_rbs_value_sbf_bw_out_send *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_send), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_bw_out_send_print( + const struct mptcp_rbs_value_sbf_bw_out_send *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".BW_OUT_SEND"); + return len; +} + +/* slow start threshold ssthresh */ + +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_ssthresh *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_ssthresh), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_SSTHRESH; + value->free = mptcp_rbs_value_sbf_ssthresh_free; + value->execute = mptcp_rbs_value_sbf_ssthresh_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_ssthresh_free( + struct mptcp_rbs_value_sbf_ssthresh *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_ssthresh_execute( + struct mptcp_rbs_value_sbf_ssthresh *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->snd_ssthresh; +} + +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_ssthresh *value) +{ + struct mptcp_rbs_value_sbf_ssthresh *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_ssthresh), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_ssthresh_print( + const struct mptcp_rbs_value_sbf_ssthresh *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SSTHRESH"); + return len; +} + +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_throttled *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_throttled), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_THROTTLED; + value->free = mptcp_rbs_value_sbf_throttled_free; + value->execute = mptcp_rbs_value_sbf_throttled_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_throttled_free( + struct mptcp_rbs_value_sbf_throttled *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_throttled_execute( + struct mptcp_rbs_value_sbf_throttled *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return test_bit(TSQ_THROTTLED, &sbf->tsq_flags); +} + +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_throttled *value) +{ + struct mptcp_rbs_value_sbf_throttled *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_throttled), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_throttled_print( + const struct mptcp_rbs_value_sbf_throttled *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".THROTTLED"); + return len; +} + +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_lossy *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_lossy), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_LOSSY; + value->free = mptcp_rbs_value_sbf_lossy_free; + value->execute = mptcp_rbs_value_sbf_lossy_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_lossy_free(struct mptcp_rbs_value_sbf_lossy *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_lossy_execute(struct mptcp_rbs_value_sbf_lossy *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + if (inet_csk((struct sock *) sbf)->icsk_ca_state == TCP_CA_Loss) { + mptcp_debug("sbf_is_available %p loss state -> false\n", sbf); + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been + * acked. (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ + if (!tcp_is_reno(sbf)) + return true; + else if (sbf->snd_una != sbf->high_seq) + return true; + } + + return false; +} + +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lossy *value) +{ + struct mptcp_rbs_value_sbf_lossy *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_lossy), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_lossy_print( + const struct mptcp_rbs_value_sbf_lossy *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".LOSSY"); + return len; +} + +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_new( + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_value_sbf_list_next *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_next), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_NEXT; + value->free = mptcp_rbs_value_sbf_list_next_free; + value->execute = mptcp_rbs_value_sbf_list_next_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_sbf_list_next_free( + struct mptcp_rbs_value_sbf_list_next *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_next_execute( + struct mptcp_rbs_value_sbf_list_next *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + +printk("%s for meta_sk %p with self %p coming from %pS with self->exec_count %u and rbs_cb->exec_count %u and prev %p and isnull %d\n", __func__, ctx->mpcb->meta_sk, self, __builtin_return_address(0), self->exec_count, ctx->rbs_cb->exec_count, self->prev, self->is_null); + + if (self->exec_count != ctx->rbs_cb->exec_count) { + self->prev = NULL; + self->is_null = false; + self->exec_count = ctx->rbs_cb->exec_count; + } + if (self->is_null) + return NULL; + + sbf = self->list->execute(self->list, ctx, &self->prev, &self->is_null); + if (!sbf) { + self->prev = NULL; + self->is_null = true; + + /* If we have nested loops we have to make sure that next time + * we visit this value the first item of the list is returned + */ + --self->exec_count; + } + + return sbf; +} + +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_next *value) +{ + struct mptcp_rbs_value_sbf_list_next *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_next), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_sbf_list_next_print( + const struct mptcp_rbs_value_sbf_list_next *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".NEXT()"); + return len; +} + +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_new( + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_value_sbf_list_empty *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_empty), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_EMPTY; + value->free = mptcp_rbs_value_sbf_list_empty_free; + value->execute = mptcp_rbs_value_sbf_list_empty_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_sbf_list_empty_free( + struct mptcp_rbs_value_sbf_list_empty *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_list_empty_execute( + struct mptcp_rbs_value_sbf_list_empty *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + + if (is_null) + return -1; + return sbf ? 0 : 1; +} + +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_empty *value) +{ + struct mptcp_rbs_value_sbf_list_empty *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_empty), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_sbf_list_empty_print( + const struct mptcp_rbs_value_sbf_list_empty *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".EMPTY"); + return len; +} + +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_new( + void) +{ + struct mptcp_rbs_value_sbf_list_filter *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_FILTER; + value->free = mptcp_rbs_value_sbf_list_filter_free; + value->execute = mptcp_rbs_value_sbf_list_filter_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_filter_free( + struct mptcp_rbs_value_sbf_list_filter *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_execute( + struct mptcp_rbs_value_sbf_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null) +{ + struct tcp_sock *sbf; + s32 b; + + sbf = self->list->execute(self->list, ctx, prev, is_null); + if (*is_null) + return NULL; + + while (sbf) { + self->cur = sbf; + b = self->cond->execute(self->cond, ctx); + if (b > 0) + break; + + sbf = self->list->execute(self->list, ctx, prev, is_null); + } + + return sbf; +} + +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter *value) +{ + struct mptcp_rbs_value_sbf_list_filter *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_filter_print( + const struct mptcp_rbs_value_sbf_list_filter *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".FILTER(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_new(struct tcp_sock **cur) +{ + struct mptcp_rbs_value_sbf_list_filter_sbf *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter_sbf), + GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_FILTER_SBF; + value->free = mptcp_rbs_value_sbf_list_filter_sbf_free; + value->execute = mptcp_rbs_value_sbf_list_filter_sbf_execute; + value->cur = cur; + + return value; +} + +void mptcp_rbs_value_sbf_list_filter_sbf_free( + struct mptcp_rbs_value_sbf_list_filter_sbf *self) +{ + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_sbf_execute( + struct mptcp_rbs_value_sbf_list_filter_sbf *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return *self->cur; +} + +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter_sbf *value) +{ + struct mptcp_rbs_value_sbf_list_filter_sbf *clone; + int i; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter_sbf), + GFP_KERNEL); + *clone = *value; + + for (i = 0; i < MAX_NESTING; ++i) { + if (clone->cur == ctx->repls[i].repl) { + clone->cur = ctx->repls[i].repl_with; + break; + } + } + + return clone; +} + +int mptcp_rbs_value_sbf_list_filter_sbf_print( + const struct mptcp_rbs_value_sbf_list_filter_sbf *value, char *buffer) +{ + return sprintf_null(&buffer, "v%p", value->cur); +} + +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_new(void) +{ + struct mptcp_rbs_value_sbf_list_max *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_max), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_MAX; + value->free = mptcp_rbs_value_sbf_list_max_free; + value->execute = mptcp_rbs_value_sbf_list_max_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_max_free( + struct mptcp_rbs_value_sbf_list_max *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_max_execute( + struct mptcp_rbs_value_sbf_list_max *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 value; + struct tcp_sock *max_sbf = NULL; + s64 max_value = -1; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (sbf) { + self->cur = sbf; + value = self->cond->execute(self->cond, ctx); + if (value != -1 && value > max_value) { + max_value = value; + max_sbf = sbf; + } + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return max_sbf; +} + +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_max *value) +{ + struct mptcp_rbs_value_sbf_list_max *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_max), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_max_print( + const struct mptcp_rbs_value_sbf_list_max *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".MAX(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_new(void) +{ + struct mptcp_rbs_value_sbf_list_min *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_min), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_MIN; + value->free = mptcp_rbs_value_sbf_list_min_free; + value->execute = mptcp_rbs_value_sbf_list_min_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_min_free( + struct mptcp_rbs_value_sbf_list_min *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_min_execute( + struct mptcp_rbs_value_sbf_list_min *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 value; + struct tcp_sock *min_sbf = NULL; + s64 min_value = 0xFFFFFFFFll; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (sbf) { + self->cur = sbf; + value = self->cond->execute(self->cond, ctx); + if (value != -1 && value < min_value) { + min_value = value; + min_sbf = sbf; + } + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return min_sbf; +} + +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_min *value) +{ + struct mptcp_rbs_value_sbf_list_min *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_min), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_min_print( + const struct mptcp_rbs_value_sbf_list_min *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".MIN(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_new( + struct mptcp_rbs_value_sbf_list *list, struct mptcp_rbs_value_int *index) +{ + struct mptcp_rbs_value_sbf_list_get *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_get), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_GET; + value->free = mptcp_rbs_value_sbf_list_get_free; + value->execute = mptcp_rbs_value_sbf_list_get_execute; + value->list = list; + value->index = index; + + return value; +} + +void mptcp_rbs_value_sbf_list_get_free( + struct mptcp_rbs_value_sbf_list_get *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->index); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_get_execute( + struct mptcp_rbs_value_sbf_list_get *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 idx; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + idx = self->index->execute(self->index, ctx); + + if (is_null || idx < 0) + return NULL; + + while (sbf && idx) { + --idx; + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return sbf; +} + +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_get *value) +{ + struct mptcp_rbs_value_sbf_list_get *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_get), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + CLONE(clone->index); + + return clone; +} + +int mptcp_rbs_value_sbf_list_get_print( + const struct mptcp_rbs_value_sbf_list_get *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".GET("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->index, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + + return len; +} + +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_new( + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_value_sbf_list_count *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_count), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_COUNT; + value->free = mptcp_rbs_value_sbf_list_count_free; + value->execute = mptcp_rbs_value_sbf_list_count_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_sbf_list_count_free( + struct mptcp_rbs_value_sbf_list_count *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_list_count_execute( + struct mptcp_rbs_value_sbf_list_count *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + void *prev = NULL; + bool is_null; + int n = 0; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return -1; + + while (sbf) { + ++n; + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return n; +} + +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_count *value) +{ + struct mptcp_rbs_value_sbf_list_count *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_count), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_sbf_list_count_print( + const struct mptcp_rbs_value_sbf_list_count *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".COUNT"); + return len; +} + +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_new(void) +{ + struct mptcp_rbs_value_sbf_list_sum *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_sum), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_SUM; + value->free = mptcp_rbs_value_sbf_list_sum_free; + value->execute = mptcp_rbs_value_sbf_list_sum_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_sum_free( + struct mptcp_rbs_value_sbf_list_sum *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_list_sum_execute( + struct mptcp_rbs_value_sbf_list_sum *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 sum = 0; + s64 value; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return -1; + + while (sbf) { + self->cur = sbf; + value = self->cond->execute(self->cond, ctx); + if (value != -1) + sum += value; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return sum; +} + +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_sum *value) +{ + struct mptcp_rbs_value_sbf_list_sum *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_sum), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_sum_print( + const struct mptcp_rbs_value_sbf_list_sum *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SUM(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_next *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_next), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_NEXT; + value->free = mptcp_rbs_value_skb_list_next_free; + value->execute = mptcp_rbs_value_skb_list_next_execute; + value->list = list; + value->reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_skb_list_next_free( + struct mptcp_rbs_value_skb_list_next *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_next_execute( + struct mptcp_rbs_value_skb_list_next *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + if (self->exec_count != ctx->rbs_cb->exec_count) { + self->prev = NULL; + self->is_null = false; + self->exec_count = ctx->rbs_cb->exec_count; + } + if (self->is_null) + return NULL; + + skb = self->list->execute(self->list, ctx, &self->prev, &self->is_null); + if (!skb) { + self->prev = NULL; + self->is_null = true; + + /* If we have nested loops we have to make sure that next time + * we visit this value the first item of the list is returned + */ + --self->exec_count; + } + + return skb; +} + +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_next *value) +{ + struct mptcp_rbs_value_skb_list_next *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_next), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_next_print( + const struct mptcp_rbs_value_skb_list_next *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".NEXT()"); + return len; +} + +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_new( + struct mptcp_rbs_value_skb *skb, struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_skb_sent_on *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_sent_on), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_SENT_ON; + value->free = mptcp_rbs_value_skb_sent_on_free; + value->execute = mptcp_rbs_value_skb_sent_on_execute; + value->skb = skb; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_skb_sent_on_free(struct mptcp_rbs_value_skb_sent_on *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_skb_sent_on_execute( + struct mptcp_rbs_value_skb_sent_on *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + struct tcp_sock *sbf; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_pi_to_flag(sbf->mptcp->path_index) & + TCP_SKB_CB(skb)->path_mask; +} + +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on *value) +{ + struct mptcp_rbs_value_skb_sent_on *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_sent_on), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_sent_on_print( + const struct mptcp_rbs_value_skb_sent_on *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SENT_ON("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_sent_on_all *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_sent_on_all), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_SENT_ON_ALL; + value->free = mptcp_rbs_value_skb_sent_on_all_free; + value->execute = mptcp_rbs_value_skb_sent_on_all_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_sent_on_all_free( + struct mptcp_rbs_value_skb_sent_on_all *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s32 mptcp_rbs_value_skb_sent_on_all_execute( + struct mptcp_rbs_value_skb_sent_on_all *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + u32 mask; + struct tcp_sock *sbf; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + mask = TCP_SKB_CB(skb)->path_mask; + sbf = ctx->mpcb->connection_list; + + while (sbf) { + if (!(mask & mptcp_pi_to_flag(sbf->mptcp->path_index))) + return 0; + + sbf = sbf->mptcp->next; + } + + return 1; +} + +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on_all *value) +{ + struct mptcp_rbs_value_skb_sent_on_all *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_sent_on_all), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_sent_on_all_print( + const struct mptcp_rbs_value_skb_sent_on_all *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SENT_ON_ALL"); + return len; +} + +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_user *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_user), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_USER; + value->free = mptcp_rbs_value_skb_user_free; + value->execute = mptcp_rbs_value_skb_user_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_user_free(struct mptcp_rbs_value_skb_user *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s64 mptcp_rbs_value_skb_user_execute(struct mptcp_rbs_value_skb_user *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->mptcp_rbs.user; +} + +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_user *value) +{ + struct mptcp_rbs_value_skb_user *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_user), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_user_print(const struct mptcp_rbs_value_skb_user *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".USER"); + return len; +} + +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_seq *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_seq), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_SEQ; + value->free = mptcp_rbs_value_skb_seq_free; + value->execute = mptcp_rbs_value_skb_seq_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_seq_free(struct mptcp_rbs_value_skb_seq *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s64 mptcp_rbs_value_skb_seq_execute(struct mptcp_rbs_value_skb_seq *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->seq; +} + +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_seq *value) +{ + struct mptcp_rbs_value_skb_seq *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_seq), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_seq_print(const struct mptcp_rbs_value_skb_seq *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SEQ"); + return len; +} + +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_psh *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_psh), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_PSH; + value->free = mptcp_rbs_value_skb_psh_free; + value->execute = mptcp_rbs_value_skb_psh_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_psh_free(struct mptcp_rbs_value_skb_psh *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s32 mptcp_rbs_value_skb_psh_execute(struct mptcp_rbs_value_skb_psh *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH; +} + +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_psh *value) +{ + struct mptcp_rbs_value_skb_psh *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_psh), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_psh_print(const struct mptcp_rbs_value_skb_psh *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".PSH"); + return len; +} + +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_length *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_length), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_LENGTH; + value->free = mptcp_rbs_value_skb_length_free; + value->execute = mptcp_rbs_value_skb_length_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_length_free(struct mptcp_rbs_value_skb_length *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s64 mptcp_rbs_value_skb_length_execute(struct mptcp_rbs_value_skb_length *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; +} + +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_length *value) +{ + struct mptcp_rbs_value_skb_length *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_length), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_length_print(const struct mptcp_rbs_value_skb_length *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".LENGTH"); + return len; +} + +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_empty *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_empty), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_EMPTY; + value->free = mptcp_rbs_value_skb_list_empty_free; + value->execute = mptcp_rbs_value_skb_list_empty_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_skb_list_empty_free( + struct mptcp_rbs_value_skb_list_empty *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s32 mptcp_rbs_value_skb_list_empty_execute( + struct mptcp_rbs_value_skb_list_empty *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb; + + skb = self->list->execute(self->list, ctx, &prev, &is_null); + + if (is_null) + return -1; + return skb ? 0 : 1; +} + +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_empty *value) +{ + struct mptcp_rbs_value_skb_list_empty *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_empty), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_empty_print( + const struct mptcp_rbs_value_skb_list_empty *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".EMPTY"); + return len; +} + +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_pop *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_pop), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_POP; + value->free = mptcp_rbs_value_skb_list_pop_free; + value->execute = mptcp_rbs_value_skb_list_pop_execute; + value->list = list; + value->reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_skb_list_pop_free( + struct mptcp_rbs_value_skb_list_pop *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_pop_execute( + struct mptcp_rbs_value_skb_list_pop *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb = + self->list->execute(self->list, ctx, &prev, &is_null); + + if (is_null || !skb) + return NULL; + + /* after this point, we are sure that we execute a pop */ + ctx->side_effects = 1; + + if (self->list->underlying_queue_kind == VALUE_KIND_Q) { + /* + * Pop an element from Q might be the queue_position or later + */ + if (skb == ctx->rbs_cb->queue_position) { + mptcp_rbs_advance_send_head( + ctx->meta_sk, &ctx->rbs_cb->queue_position); + mptcp_rbs_debug( + "rbs_q_pop returns %p, new queue head %p\n", skb, + ctx->rbs_cb->queue_position); + } else { + /* we can not unlink the packet, as all skbs have to + * stay in the circular buffer */ + mptcp_debug( + "%s sets not_in_queue for packet %p in Q, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + } + + return skb; + } + + if (self->list->underlying_queue_kind == VALUE_KIND_RQ) { + mptcp_debug("%s sets not_in_queue, to_free and to_unlink for " + "packet %p in RQ, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_free = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_unlink = 1; + + return skb; + } + + if (self->list->underlying_queue_kind == VALUE_KIND_QU) { + mptcp_debug( + "%s sets not_in_queue for packet %p in QU, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + return skb; + } + + mptcp_rbs_debug("mptcp_rbs_value_skb_list_pop_execute on " + "unexpected list kind %u\n", + self->list->kind); + return NULL; +} + +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_pop *value) +{ + struct mptcp_rbs_value_skb_list_pop *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_pop), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_pop_print( + const struct mptcp_rbs_value_skb_list_pop *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".POP()"); + return len; +} + +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_new( + void) +{ + struct mptcp_rbs_value_skb_list_filter *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_filter), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_FILTER; + value->free = mptcp_rbs_value_skb_list_filter_free; + value->execute = mptcp_rbs_value_skb_list_filter_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_skb_list_filter_free( + struct mptcp_rbs_value_skb_list_filter *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_filter_execute( + struct mptcp_rbs_value_skb_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null) +{ + struct sk_buff *skb; + s32 b; + + skb = self->list->execute(self->list, ctx, prev, is_null); + if (*is_null) + return NULL; + + while (skb) { + self->progress.cur = skb; + b = self->cond->execute(self->cond, ctx); + if (b > 0) + break; + + skb = self->list->execute(self->list, ctx, prev, is_null); + } + + return skb; +} + +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter *value) +{ + struct mptcp_rbs_value_skb_list_filter *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_filter), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->progress; + ctx->repls[i].repl_with = &clone->progress; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_skb_list_filter_print( + const struct mptcp_rbs_value_skb_list_filter *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".FILTER(v%p => ", &value->progress); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_new( + struct mptcp_rbs_value_skb_list_filter_progress *progress) +{ + struct mptcp_rbs_value_skb_list_filter_skb *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_list_filter_skb), + GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_FILTER_SKB; + value->free = mptcp_rbs_value_skb_list_filter_skb_free; + value->execute = mptcp_rbs_value_skb_list_filter_skb_execute; + value->progress = progress; + value->reinject = progress->reinject; + + return value; +} + +void mptcp_rbs_value_skb_list_filter_skb_free( + struct mptcp_rbs_value_skb_list_filter_skb *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_filter_skb_execute( + struct mptcp_rbs_value_skb_list_filter_skb *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return self->progress->cur; +} + +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter_skb *value) +{ + struct mptcp_rbs_value_skb_list_filter_skb *clone; + int i; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_list_filter_skb), + GFP_KERNEL); + *clone = *value; + + for (i = 0; i < MAX_NESTING; ++i) { + if (clone->progress == ctx->repls[i].repl) { + clone->progress = ctx->repls[i].repl_with; + break; + } + } + BUG_ON(i == MAX_NESTING); + + return clone; +} + +int mptcp_rbs_value_skb_list_filter_skb_print( + const struct mptcp_rbs_value_skb_list_filter_skb *value, char *buffer) +{ + return sprintf_null(&buffer, "v%p", value->progress); +} + +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_count *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_count), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_COUNT; + value->free = mptcp_rbs_value_skb_list_count_free; + value->execute = mptcp_rbs_value_skb_list_count_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_skb_list_count_free( + struct mptcp_rbs_value_skb_list_count *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s64 mptcp_rbs_value_skb_list_count_execute( + struct mptcp_rbs_value_skb_list_count *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + void *prev = NULL; + bool is_null; + int n = 0; + + skb = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return -1; + + while (skb) { + ++n; + skb = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return n; +} + +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_count *value) +{ + struct mptcp_rbs_value_skb_list_count *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_count), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_count_print( + const struct mptcp_rbs_value_skb_list_count *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".COUNT"); + return len; +} + +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_top *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_top), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_TOP; + value->free = mptcp_rbs_value_skb_list_top_free; + value->execute = mptcp_rbs_value_skb_list_top_execute; + value->list = list; + value->reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_skb_list_top_free( + struct mptcp_rbs_value_skb_list_top *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_top_execute( + struct mptcp_rbs_value_skb_list_top *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb = + self->list->execute(self->list, ctx, &prev, &is_null); + + /* + * IMPORTANT: do not unset TCP_SKB_CB(skb)->mptcp_rbs_... here! + * only POP might set it, once it is set, it remains forever! + */ + + if (is_null) + return NULL; + + return skb; +} + +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_top *value) +{ + struct mptcp_rbs_value_skb_list_top *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_top), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_top_print( + const struct mptcp_rbs_value_skb_list_top *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".TOP"); + return len; +} + +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_new( + struct mptcp_rbs_value_skb_list *list, struct mptcp_rbs_value_int *index) +{ + struct mptcp_rbs_value_skb_list_get *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_get), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_GET; + value->free = mptcp_rbs_value_skb_list_get_free; + value->execute = mptcp_rbs_value_skb_list_get_execute; + value->list = list; + value->index = index; + + return value; +} + +void mptcp_rbs_value_skb_list_get_free( + struct mptcp_rbs_value_skb_list_get *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->index); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_get_execute( + struct mptcp_rbs_value_skb_list_get *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb; + s64 idx; + + skb = self->list->execute(self->list, ctx, &prev, &is_null); + idx = self->index->execute(self->index, ctx); + + /* + * IMPORTANT: do not unset TCP_SKB_CB(skb)->mptcp_rbs_... here! + * only POP might set it, once it is set, it remains forever! + */ + + if (is_null || idx < 0) + return NULL; + + while (skb && idx) { + idx--; + skb = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return skb; +} + +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_get *value) +{ + struct mptcp_rbs_value_skb_list_get *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_get), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + CLONE(clone->index); + + return clone; +} + +int mptcp_rbs_value_skb_list_get_print( + const struct mptcp_rbs_value_skb_list_get *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".GET("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->index, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + + return len; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +enum mptcp_rbs_type_kind mptcp_rbs_value_get_type( + enum mptcp_rbs_value_kind kind) +{ + switch (kind) { + /* Built-in values */ + case VALUE_KIND_CONSTINT: + return TYPE_KIND_INT; + case VALUE_KIND_CONSTSTRING: + return TYPE_KIND_STRING; + case VALUE_KIND_NULL: + return TYPE_KIND_NULL; + case VALUE_KIND_BOOL_VAR: + return TYPE_KIND_BOOL; + case VALUE_KIND_INT_VAR: + return TYPE_KIND_INT; + case VALUE_KIND_STRING_VAR: + return TYPE_KIND_STRING; + case VALUE_KIND_SBF_VAR: + return TYPE_KIND_SBF; + case VALUE_KIND_SBFLIST_VAR: + return TYPE_KIND_SBFLIST; + case VALUE_KIND_SKB_VAR: + return TYPE_KIND_SKB; + case VALUE_KIND_SKBLIST_VAR: + return TYPE_KIND_SKBLIST; + case VALUE_KIND_NOT: + case VALUE_KIND_EQUAL: + case VALUE_KIND_UNEQUAL: + case VALUE_KIND_LESS: + case VALUE_KIND_LESS_EQUAL: + case VALUE_KIND_GREATER: + case VALUE_KIND_GREATER_EQUAL: + case VALUE_KIND_AND: + case VALUE_KIND_OR: + return TYPE_KIND_BOOL; + case VALUE_KIND_ADD: + case VALUE_KIND_SUBTRACT: + case VALUE_KIND_MULTIPLY: + case VALUE_KIND_DIVIDE: + case VALUE_KIND_REMAINDER: + return TYPE_KIND_INT; + case VALUE_KIND_IS_NULL: + case VALUE_KIND_IS_NOT_NULL: + return TYPE_KIND_BOOL; + case VALUE_KIND_REG: + return TYPE_KIND_INT; + case VALUE_KIND_SBFLIST_NEXT: + return TYPE_KIND_SBF; + case VALUE_KIND_SKBLIST_NEXT: + return TYPE_KIND_SKB; + +/* Custom values */ +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +struct mptcp_rbs_value *mptcp_rbs_value_clone_ex( + struct mptcp_rbs_value_clone_ctx *ctx, const struct mptcp_rbs_value *value) +{ + if (ctx->user_func) { + struct mptcp_rbs_value *clone; + + clone = ctx->user_func(ctx->user_ctx, value); + if (clone) + return clone; + } + + switch (value->kind) { + /* Built-in values */ + APPLY_CLONE_VALUE(VALUE_KIND_CONSTINT, , + mptcp_rbs_value_constint, ) + APPLY_CLONE_VALUE(VALUE_KIND_CONSTSTRING, , + mptcp_rbs_value_conststring, ) + APPLY_CLONE_VALUE(VALUE_KIND_NULL, , mptcp_rbs_value_null, ) + APPLY_CLONE_VALUE(VALUE_KIND_BOOL_VAR, , + mptcp_rbs_value_bool_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_INT_VAR, , + mptcp_rbs_value_int_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_STRING_VAR, , + mptcp_rbs_value_string_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SBF_VAR, , + mptcp_rbs_value_sbf_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SBFLIST_VAR, , + mptcp_rbs_value_sbf_list_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SKB_VAR, , + mptcp_rbs_value_skb_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SKBLIST_VAR, , + mptcp_rbs_value_skb_list_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_NOT, , mptcp_rbs_value_not, ) + APPLY_CLONE_VALUE(VALUE_KIND_EQUAL, , mptcp_rbs_value_equal, ) + APPLY_CLONE_VALUE(VALUE_KIND_UNEQUAL, , + mptcp_rbs_value_unequal, ) + APPLY_CLONE_VALUE(VALUE_KIND_LESS, , mptcp_rbs_value_less, ) + APPLY_CLONE_VALUE(VALUE_KIND_LESS_EQUAL, , + mptcp_rbs_value_less_equal, ) + APPLY_CLONE_VALUE(VALUE_KIND_GREATER, , + mptcp_rbs_value_greater, ) + APPLY_CLONE_VALUE(VALUE_KIND_GREATER_EQUAL, , + mptcp_rbs_value_greater_equal, ) + APPLY_CLONE_VALUE(VALUE_KIND_AND, , mptcp_rbs_value_and, ) + APPLY_CLONE_VALUE(VALUE_KIND_OR, , mptcp_rbs_value_or, ) + APPLY_CLONE_VALUE(VALUE_KIND_ADD, , mptcp_rbs_value_add, ) + APPLY_CLONE_VALUE(VALUE_KIND_SUBTRACT, , + mptcp_rbs_value_subtract, ) + APPLY_CLONE_VALUE(VALUE_KIND_MULTIPLY, , + mptcp_rbs_value_multiply, ) + APPLY_CLONE_VALUE(VALUE_KIND_DIVIDE, , mptcp_rbs_value_divide, ) + APPLY_CLONE_VALUE(VALUE_KIND_REMAINDER, , + mptcp_rbs_value_remainder, ) + APPLY_CLONE_VALUE(VALUE_KIND_IS_NULL, , + mptcp_rbs_value_is_null, ) + APPLY_CLONE_VALUE(VALUE_KIND_IS_NOT_NULL, , + mptcp_rbs_value_is_not_null, ) + APPLY_CLONE_VALUE(VALUE_KIND_REG, , mptcp_rbs_value_reg, ) + APPLY_CLONE_VALUE(VALUE_KIND_SBFLIST_NEXT, , + mptcp_rbs_value_sbf_list_next, ) + APPLY_CLONE_VALUE(VALUE_KIND_SKBLIST_NEXT, , + mptcp_rbs_value_skb_list_next, ) + +/* Custom values */ +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_value *mptcp_rbs_value_clone( + const struct mptcp_rbs_value *value, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_value_clone_ctx ctx; + + memset(&ctx, 0, sizeof(struct mptcp_rbs_value_clone_ctx)); + ctx.user_ctx = user_ctx; + ctx.user_func = user_func; + return mptcp_rbs_value_clone_ex(&ctx, value); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +int mptcp_rbs_value_print(const struct mptcp_rbs_value *value, char *buffer) +{ + switch (value->kind) { + /* Built-in values */ + APPLY_PRINT_VALUE(VALUE_KIND_CONSTINT, , + mptcp_rbs_value_constint, ) + APPLY_PRINT_VALUE(VALUE_KIND_CONSTSTRING, , + mptcp_rbs_value_conststring, ) + APPLY_PRINT_VALUE(VALUE_KIND_NULL, , mptcp_rbs_value_null, ) + APPLY_PRINT_VALUE(VALUE_KIND_BOOL_VAR, , + mptcp_rbs_value_bool_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_INT_VAR, , + mptcp_rbs_value_int_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_STRING_VAR, , + mptcp_rbs_value_string_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SBF_VAR, , + mptcp_rbs_value_sbf_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SBFLIST_VAR, , + mptcp_rbs_value_sbf_list_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SKB_VAR, , + mptcp_rbs_value_skb_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SKBLIST_VAR, , + mptcp_rbs_value_skb_list_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_NOT, , mptcp_rbs_value_not, ) + APPLY_PRINT_VALUE(VALUE_KIND_EQUAL, , mptcp_rbs_value_equal, ) + APPLY_PRINT_VALUE(VALUE_KIND_UNEQUAL, , + mptcp_rbs_value_unequal, ) + APPLY_PRINT_VALUE(VALUE_KIND_LESS, , mptcp_rbs_value_less, ) + APPLY_PRINT_VALUE(VALUE_KIND_LESS_EQUAL, , + mptcp_rbs_value_less_equal, ) + APPLY_PRINT_VALUE(VALUE_KIND_GREATER, , + mptcp_rbs_value_greater, ) + APPLY_PRINT_VALUE(VALUE_KIND_GREATER_EQUAL, , + mptcp_rbs_value_greater_equal, ) + APPLY_PRINT_VALUE(VALUE_KIND_AND, , mptcp_rbs_value_and, ) + APPLY_PRINT_VALUE(VALUE_KIND_OR, , mptcp_rbs_value_or, ) + APPLY_PRINT_VALUE(VALUE_KIND_ADD, , mptcp_rbs_value_add, ) + APPLY_PRINT_VALUE(VALUE_KIND_SUBTRACT, , + mptcp_rbs_value_subtract, ) + APPLY_PRINT_VALUE(VALUE_KIND_MULTIPLY, , + mptcp_rbs_value_multiply, ) + APPLY_PRINT_VALUE(VALUE_KIND_DIVIDE, , mptcp_rbs_value_divide, ) + APPLY_PRINT_VALUE(VALUE_KIND_REMAINDER, , + mptcp_rbs_value_remainder, ) + APPLY_PRINT_VALUE(VALUE_KIND_IS_NULL, , + mptcp_rbs_value_is_null, ) + APPLY_PRINT_VALUE(VALUE_KIND_IS_NOT_NULL, , + mptcp_rbs_value_is_not_null, ) + APPLY_PRINT_VALUE(VALUE_KIND_REG, , mptcp_rbs_value_reg, ) + APPLY_PRINT_VALUE(VALUE_KIND_SBFLIST_NEXT, , + mptcp_rbs_value_sbf_list_next, ) + APPLY_PRINT_VALUE(VALUE_KIND_SKBLIST_NEXT, , + mptcp_rbs_value_skb_list_next, ) + +/* Custom values */ +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } +} +#pragma GCC diagnostic pop diff --git a/net/mptcp/mptcp_rbs_value.h b/net/mptcp/mptcp_rbs_value.h new file mode 100644 index 0000000000000..981762cab698f --- /dev/null +++ b/net/mptcp/mptcp_rbs_value.h @@ -0,0 +1,2098 @@ +#ifndef _MPTCP_RBS_AST_H +#define _MPTCP_RBS_AST_H + +#include "mptcp_rbs_type.h" +#include + +struct mptcp_rbs_eval_ctx; +struct mptcp_rbs_sbf_cb; +struct mptcp_rbs_value_clone_ctx; +struct sk_buff_head; + +/* + * The following macro describes all values that can be parsed. + * To add a value add a new line to the macro with the following content: + * ( + * , + * , + * , + * + * ) + * + * The determines the value on which the new value can be + * applied. For example RQ.getvalue has the macro RBS_APPLY_ON_SKB_LIST because + * getvalue is applied on RQ of type sockbuffer list. Possible macros are: + * RBS_APPLY if the value needs no other value to be applied + * RBS_APPLY_ON_SBF + * RBS_APPLY_ON_SBF_LIST + * RBS_APPLY_ON_SKB + * RBS_APPLY_ON_SKB_LIST + */ +#define MPTCP_RBS_VALUE_INFO \ + RBS_APPLY(VALUE_KIND_Q, "Q", mptcp_rbs_value_q, TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_QU, "QU", mptcp_rbs_value_qu, TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_RQ, "RQ", mptcp_rbs_value_rq, TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_SUBFLOWS, "SUBFLOWS", mptcp_rbs_value_subflows, \ + TYPE_KIND_SBFLIST) \ + RBS_APPLY(VALUE_KIND_CURRENT_TIME_MS, "CURRENT_TIME_MS", \ + mptcp_rbs_value_current_time_ms, TYPE_KIND_INT) \ + RBS_APPLY(VALUE_KIND_RANDOM, "RANDOM", mptcp_rbs_value_random, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_RTT, "RTT", mptcp_rbs_value_sbf_rtt, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_RTT_MS, "RTT_MS", mptcp_rbs_value_sbf_rtt_ms, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_RTT_VAR, "RTT_VAR", mptcp_rbs_value_sbf_rtt_var, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_USER, "USER", mptcp_rbs_value_sbf_user, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_IS_BACKUP, "IS_BACKUP", \ + mptcp_rbs_value_sbf_is_backup, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_CWND, "CWND", \ + mptcp_rbs_value_sbf_cwnd, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_QUEUED, "QUEUED", \ + mptcp_rbs_value_sbf_queued, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_SKBS_IN_FLIGHT, "SKBS_IN_FLIGHT", \ + mptcp_rbs_value_sbf_skbs_in_flight, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_LOST_SKBS, "LOST_SKBS", \ + mptcp_rbs_value_sbf_lost_skbs, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_HAS_WINDOW_FOR, "HAS_WINDOW_FOR", \ + mptcp_rbs_value_sbf_has_window_for, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_ID, "ID", mptcp_rbs_value_sbf_id, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_DELAY_IN, "DELAY_IN", \ + mptcp_rbs_value_sbf_delay_in, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_DELAY_OUT, "DELAY_OUT", \ + mptcp_rbs_value_sbf_delay_out, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_BW_OUT_ACK, "BW_OUT_ACK", \ + mptcp_rbs_value_sbf_bw_out_ack, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_BW_OUT_SEND, "BW_OUT_SEND", \ + mptcp_rbs_value_sbf_bw_out_send, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_SSTHRESH, "SSTHRESH", \ + mptcp_rbs_value_sbf_ssthresh, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_THROTTLED, "THROTTLED", \ + mptcp_rbs_value_sbf_throttled, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_LOSSY, "LOSSY", \ + mptcp_rbs_value_sbf_lossy, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_EMPTY, "EMPTY", \ + mptcp_rbs_value_sbf_list_empty, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_FILTER, "FILTER", \ + mptcp_rbs_value_sbf_list_filter, \ + TYPE_KIND_SBFLIST) \ + RBS_APPLY(VALUE_KIND_SBFLIST_FILTER_SBF, "", \ + mptcp_rbs_value_sbf_list_filter_sbf, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_MAX, "MAX", \ + mptcp_rbs_value_sbf_list_max, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_MIN, "MIN", \ + mptcp_rbs_value_sbf_list_min, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_GET, "GET", \ + mptcp_rbs_value_sbf_list_get, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_COUNT, "COUNT", \ + mptcp_rbs_value_sbf_list_count, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_SUM, "SUM", \ + mptcp_rbs_value_sbf_list_sum, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_SENT_ON, "SENT_ON", \ + mptcp_rbs_value_skb_sent_on, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_SENT_ON_ALL, "SENT_ON_ALL", \ + mptcp_rbs_value_skb_sent_on_all, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_USER, "USER", \ + mptcp_rbs_value_skb_user, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_SEQ, "SEQ", \ + mptcp_rbs_value_skb_seq, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_PSH, "PSH", \ + mptcp_rbs_value_skb_psh, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_LENGTH, "LENGTH", \ + mptcp_rbs_value_skb_length, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_EMPTY, "EMPTY", \ + mptcp_rbs_value_skb_list_empty, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_POP, "POP", \ + mptcp_rbs_value_skb_list_pop, TYPE_KIND_SKB) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_FILTER, "FILTER", \ + mptcp_rbs_value_skb_list_filter, \ + TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_SKBLIST_FILTER_SKB, "", \ + mptcp_rbs_value_skb_list_filter_skb, TYPE_KIND_SKB) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_COUNT, "COUNT", \ + mptcp_rbs_value_skb_list_count, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_TOP, "TOP", \ + mptcp_rbs_value_skb_list_top, TYPE_KIND_SKB) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_GET, "GET", \ + mptcp_rbs_value_skb_list_get, TYPE_KIND_SKB) + +enum mptcp_rbs_value_kind { + /* Literals */ + VALUE_KIND_CONSTINT, + VALUE_KIND_CONSTSTRING, + VALUE_KIND_NULL, + + /* Used variables */ + VALUE_KIND_BOOL_VAR, + VALUE_KIND_INT_VAR, + VALUE_KIND_STRING_VAR, + VALUE_KIND_SBF_VAR, + VALUE_KIND_SBFLIST_VAR, + VALUE_KIND_SKB_VAR, + VALUE_KIND_SKBLIST_VAR, + + /* Operators */ + VALUE_KIND_NOT, + VALUE_KIND_EQUAL, + VALUE_KIND_UNEQUAL, + VALUE_KIND_LESS, + VALUE_KIND_LESS_EQUAL, + VALUE_KIND_GREATER, + VALUE_KIND_GREATER_EQUAL, + VALUE_KIND_AND, + VALUE_KIND_OR, + VALUE_KIND_ADD, + VALUE_KIND_SUBTRACT, + VALUE_KIND_MULTIPLY, + VALUE_KIND_DIVIDE, + VALUE_KIND_REMAINDER, + VALUE_KIND_IS_NULL, + VALUE_KIND_IS_NOT_NULL, + + /* Registers */ + VALUE_KIND_REG, + + /* Functions & properties */ + VALUE_KIND_Q, + VALUE_KIND_QU, + VALUE_KIND_RQ, + VALUE_KIND_SUBFLOWS, + VALUE_KIND_CURRENT_TIME_MS, + VALUE_KIND_RANDOM, + + /* Functions & properties on subflows */ + VALUE_KIND_SBF_RTT, + VALUE_KIND_SBF_RTT_MS, + VALUE_KIND_SBF_RTT_VAR, + VALUE_KIND_SBF_USER, + VALUE_KIND_SBF_IS_BACKUP, + VALUE_KIND_SBF_CWND, + VALUE_KIND_SBF_QUEUED, + VALUE_KIND_SBF_SKBS_IN_FLIGHT, + VALUE_KIND_SBF_LOST_SKBS, + VALUE_KIND_SBF_HAS_WINDOW_FOR, + VALUE_KIND_SBF_ID, + VALUE_KIND_SBF_DELAY_IN, + VALUE_KIND_SBF_DELAY_OUT, + VALUE_KIND_SBF_BW_OUT_SEND, + VALUE_KIND_SBF_BW_OUT_ACK, + VALUE_KIND_SBF_SSTHRESH, + VALUE_KIND_SBF_THROTTLED, + VALUE_KIND_SBF_LOSSY, + + /* Functions & properties on subflow lists */ + VALUE_KIND_SBFLIST_NEXT, + VALUE_KIND_SBFLIST_EMPTY, + VALUE_KIND_SBFLIST_FILTER, + VALUE_KIND_SBFLIST_FILTER_SBF, + VALUE_KIND_SBFLIST_MAX, + VALUE_KIND_SBFLIST_MIN, + VALUE_KIND_SBFLIST_GET, + VALUE_KIND_SBFLIST_COUNT, + VALUE_KIND_SBFLIST_SUM, + + /* Functions & properties on sockbuffers */ + VALUE_KIND_SKB_SENT_ON, + VALUE_KIND_SKB_SENT_ON_ALL, + VALUE_KIND_SKB_USER, + VALUE_KIND_SKB_SEQ, + VALUE_KIND_SKB_PSH, + VALUE_KIND_SKB_LENGTH, + + /* Functions & properties on sockbuffer lists */ + VALUE_KIND_SKBLIST_NEXT, + VALUE_KIND_SKBLIST_EMPTY, + VALUE_KIND_SKBLIST_POP, + VALUE_KIND_SKBLIST_FILTER, + VALUE_KIND_SKBLIST_FILTER_SKB, + VALUE_KIND_SKBLIST_COUNT, + VALUE_KIND_SKBLIST_TOP, + VALUE_KIND_SKBLIST_GET +}; + +/* Macro to release values */ +#define MPTCP_RBS_VALUE_FREE(val) \ + if (val) \ + val->free(val); + +/* + * Base struct for all values + */ +struct mptcp_rbs_value { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value *self); +}; + +/* + * Base struct for boolean values. The execute function returns -1 on null, 0 on + * false and 1 on true + */ +struct mptcp_rbs_value_bool { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_bool *self); + s32 (*execute)(struct mptcp_rbs_value_bool *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for integer values. The execute function returns -1 on null and + * positive values < 2^32 on success + */ +struct mptcp_rbs_value_int { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_int *self); + s64 (*execute)(struct mptcp_rbs_value_int *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for string values. The execute function returns NULL on null + */ +struct mptcp_rbs_value_string { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_string *self); + char *(*execute)(struct mptcp_rbs_value_string *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for subflow values. The execute function returns NULL on null + */ +struct mptcp_rbs_value_sbf { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for subflow list values + */ +struct mptcp_rbs_value_sbf_list { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); +}; + +/* + * Base struct for sockbuffer values. The execute function returns NULL on null + */ +struct mptcp_rbs_value_skb { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; +}; + +/* + * Base struct for sockbuffer list values + */ +struct mptcp_rbs_value_skb_list { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +/* + * Integer literal value + */ +struct mptcp_rbs_value_constint { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_constint *self); + s64 (*execute)(struct mptcp_rbs_value_constint *self, + struct mptcp_rbs_eval_ctx *ctx); + unsigned int value; +}; + +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_new(unsigned int num); +void mptcp_rbs_value_constint_free(struct mptcp_rbs_value_constint *self); +s64 mptcp_rbs_value_constint_execute(struct mptcp_rbs_value_constint *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_constint *value); +int mptcp_rbs_value_constint_print(const struct mptcp_rbs_value_constint *value, + char *buffer); + +/* + * String literal value + */ +struct mptcp_rbs_value_conststring { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_conststring *self); + char *(*execute)(struct mptcp_rbs_value_conststring *self, + struct mptcp_rbs_eval_ctx *ctx); + char *value; +}; + +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_new(char *str); +void mptcp_rbs_value_conststring_free(struct mptcp_rbs_value_conststring *self); +char *mptcp_rbs_value_conststring_execute( + struct mptcp_rbs_value_conststring *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_conststring *value); +int mptcp_rbs_value_conststring_print( + const struct mptcp_rbs_value_conststring *value, char *buffer); + +/* + * NULL literal value + */ +struct mptcp_rbs_value_null { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_null *self); + s32 (*execute)(struct mptcp_rbs_value_null *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +struct mptcp_rbs_value_null *mptcp_rbs_value_null_new(void); +void mptcp_rbs_value_null_free(struct mptcp_rbs_value_null *self); +s32 mptcp_rbs_value_null_execute(struct mptcp_rbs_value_null *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_null *mptcp_rbs_value_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_null *value); +int mptcp_rbs_value_null_print(const struct mptcp_rbs_value_null *value, + char *buffer); + +/* + * Boolean variable value + */ +struct mptcp_rbs_value_bool_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_bool_var *self); + s32 (*execute)(struct mptcp_rbs_value_bool_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_new(int var_number); +void mptcp_rbs_value_bool_var_free(struct mptcp_rbs_value_bool_var *self); +s32 mptcp_rbs_value_bool_var_execute(struct mptcp_rbs_value_bool_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_bool_var *value); +int mptcp_rbs_value_bool_var_print(const struct mptcp_rbs_value_bool_var *value, + char *buffer); + +/* + * Integer variable value + */ +struct mptcp_rbs_value_int_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_int_var *self); + s64 (*execute)(struct mptcp_rbs_value_int_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_new(int var_number); +void mptcp_rbs_value_int_var_free(struct mptcp_rbs_value_int_var *self); +s64 mptcp_rbs_value_int_var_execute(struct mptcp_rbs_value_int_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_int_var *value); +int mptcp_rbs_value_int_var_print(const struct mptcp_rbs_value_int_var *value, + char *buffer); + +/* + * String variable value + */ +struct mptcp_rbs_value_string_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_string_var *self); + char *(*execute)(struct mptcp_rbs_value_string_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_new( + int var_number); +void mptcp_rbs_value_string_var_free(struct mptcp_rbs_value_string_var *self); +char *mptcp_rbs_value_string_var_execute( + struct mptcp_rbs_value_string_var *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_string_var *value); +int mptcp_rbs_value_string_var_print( + const struct mptcp_rbs_value_string_var *value, char *buffer); + +/* + * Subflow variable value + */ +struct mptcp_rbs_value_sbf_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_var *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_new(int var_number); +void mptcp_rbs_value_sbf_var_free(struct mptcp_rbs_value_sbf_var *self); +struct tcp_sock *mptcp_rbs_value_sbf_var_execute( + struct mptcp_rbs_value_sbf_var *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_var *value); +int mptcp_rbs_value_sbf_var_print(const struct mptcp_rbs_value_sbf_var *value, + char *buffer); + +/* + * Subflow list variable value + */ +struct mptcp_rbs_value_sbf_list_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_var *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_var *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + int var_number; +}; + +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_new( + int var_number); +void mptcp_rbs_value_sbf_list_var_free( + struct mptcp_rbs_value_sbf_list_var *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_var_execute( + struct mptcp_rbs_value_sbf_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_var *value); +int mptcp_rbs_value_sbf_list_var_print( + const struct mptcp_rbs_value_sbf_list_var *value, char *buffer); + +/* + * Sockbuffer variable value + */ +struct mptcp_rbs_value_skb_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_var *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_var *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + int var_number; +}; + +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_new(int var_number, + bool reinject); +void mptcp_rbs_value_skb_var_free(struct mptcp_rbs_value_skb_var *self); +struct sk_buff *mptcp_rbs_value_skb_var_execute( + struct mptcp_rbs_value_skb_var *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_var *value); +int mptcp_rbs_value_skb_var_print(const struct mptcp_rbs_value_skb_var *value, + char *buffer); + +/* + * Sockbuffer list variable value + */ +struct mptcp_rbs_value_skb_list_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_var *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_var *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; + int var_number; +}; + +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_new( + int var_number, enum mptcp_rbs_value_kind underlying_queue_kind); +void mptcp_rbs_value_skb_list_var_free( + struct mptcp_rbs_value_skb_list_var *self); +struct sk_buff *mptcp_rbs_value_skb_list_var_execute( + struct mptcp_rbs_value_skb_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_var *value); +int mptcp_rbs_value_skb_list_var_print( + const struct mptcp_rbs_value_skb_list_var *value, char *buffer); + +/* + * NOT operator value + */ +struct mptcp_rbs_value_not { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_not *self); + s32 (*execute)(struct mptcp_rbs_value_not *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_bool *operand; +}; + +struct mptcp_rbs_value_not *mptcp_rbs_value_not_new( + struct mptcp_rbs_value_bool *operand); +void mptcp_rbs_value_not_free(struct mptcp_rbs_value_not *self); +s32 mptcp_rbs_value_not_execute(struct mptcp_rbs_value_not *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_not *mptcp_rbs_value_not_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_not *value); +int mptcp_rbs_value_not_print(const struct mptcp_rbs_value_not *value, + char *buffer); + +/* + * == operator value + */ +struct mptcp_rbs_value_equal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_equal *self); + s32 (*execute)(struct mptcp_rbs_value_equal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_equal_free(struct mptcp_rbs_value_equal *self); +s32 mptcp_rbs_value_equal_execute(struct mptcp_rbs_value_equal *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_equal *value); +int mptcp_rbs_value_equal_print(const struct mptcp_rbs_value_equal *value, + char *buffer); + +/* + * != operator value + */ +struct mptcp_rbs_value_unequal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_unequal *self); + s32 (*execute)(struct mptcp_rbs_value_unequal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_unequal_free(struct mptcp_rbs_value_unequal *self); +s32 mptcp_rbs_value_unequal_execute(struct mptcp_rbs_value_unequal *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_unequal *value); +int mptcp_rbs_value_unequal_print(const struct mptcp_rbs_value_unequal *value, + char *buffer); + +/* + * < operator value + */ +struct mptcp_rbs_value_less { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_less *self); + s32 (*execute)(struct mptcp_rbs_value_less *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_less *mptcp_rbs_value_less_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_less_free(struct mptcp_rbs_value_less *self); +s32 mptcp_rbs_value_less_execute(struct mptcp_rbs_value_less *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_less *mptcp_rbs_value_less_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less *value); +int mptcp_rbs_value_less_print(const struct mptcp_rbs_value_less *value, + char *buffer); + +/* + * <= operator value + */ +struct mptcp_rbs_value_less_equal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_less_equal *self); + s32 (*execute)(struct mptcp_rbs_value_less_equal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_less_equal_free(struct mptcp_rbs_value_less_equal *self); +s32 mptcp_rbs_value_less_equal_execute(struct mptcp_rbs_value_less_equal *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less_equal *value); +int mptcp_rbs_value_less_equal_print( + const struct mptcp_rbs_value_less_equal *value, char *buffer); + +/* + * > operator value + */ +struct mptcp_rbs_value_greater { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_greater *self); + s32 (*execute)(struct mptcp_rbs_value_greater *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_greater_free(struct mptcp_rbs_value_greater *self); +s32 mptcp_rbs_value_greater_execute(struct mptcp_rbs_value_greater *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater *value); +int mptcp_rbs_value_greater_print(const struct mptcp_rbs_value_greater *value, + char *buffer); + +/* + * >= operator value + */ +struct mptcp_rbs_value_greater_equal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_greater_equal *self); + s32 (*execute)(struct mptcp_rbs_value_greater_equal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_greater_equal_free( + struct mptcp_rbs_value_greater_equal *self); +s32 mptcp_rbs_value_greater_equal_execute( + struct mptcp_rbs_value_greater_equal *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater_equal *value); +int mptcp_rbs_value_greater_equal_print( + const struct mptcp_rbs_value_greater_equal *value, char *buffer); + +/* + * AND operator value + */ +struct mptcp_rbs_value_and { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_and *self); + s32 (*execute)(struct mptcp_rbs_value_and *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_bool *left_operand; + struct mptcp_rbs_value_bool *right_operand; +}; + +struct mptcp_rbs_value_and *mptcp_rbs_value_and_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand); +void mptcp_rbs_value_and_free(struct mptcp_rbs_value_and *self); +s32 mptcp_rbs_value_and_execute(struct mptcp_rbs_value_and *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_and *mptcp_rbs_value_and_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_and *value); +int mptcp_rbs_value_and_print(const struct mptcp_rbs_value_and *value, + char *buffer); + +/* + * OR operator value + */ +struct mptcp_rbs_value_or { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_or *self); + s32 (*execute)(struct mptcp_rbs_value_or *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_bool *left_operand; + struct mptcp_rbs_value_bool *right_operand; +}; + +struct mptcp_rbs_value_or *mptcp_rbs_value_or_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand); +void mptcp_rbs_value_or_free(struct mptcp_rbs_value_or *self); +s32 mptcp_rbs_value_or_execute(struct mptcp_rbs_value_or *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_or *mptcp_rbs_value_or_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_or *value); +int mptcp_rbs_value_or_print(const struct mptcp_rbs_value_or *value, + char *buffer); + +/* + * + operator value + */ +struct mptcp_rbs_value_add { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_add *self); + s64 (*execute)(struct mptcp_rbs_value_add *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_add *mptcp_rbs_value_add_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_add_free(struct mptcp_rbs_value_add *self); +s64 mptcp_rbs_value_add_execute(struct mptcp_rbs_value_add *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_add *mptcp_rbs_value_add_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_add *value); +int mptcp_rbs_value_add_print(const struct mptcp_rbs_value_add *value, + char *buffer); + +/* + * - operator value + */ +struct mptcp_rbs_value_subtract { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_subtract *self); + s64 (*execute)(struct mptcp_rbs_value_subtract *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_subtract_free(struct mptcp_rbs_value_subtract *self); +s64 mptcp_rbs_value_subtract_execute(struct mptcp_rbs_value_subtract *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subtract *value); +int mptcp_rbs_value_subtract_print(const struct mptcp_rbs_value_subtract *value, + char *buffer); + +/* + * * operator value + */ +struct mptcp_rbs_value_multiply { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_multiply *self); + s64 (*execute)(struct mptcp_rbs_value_multiply *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_multiply_free(struct mptcp_rbs_value_multiply *self); +s64 mptcp_rbs_value_multiply_execute(struct mptcp_rbs_value_multiply *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_multiply *value); +int mptcp_rbs_value_multiply_print(const struct mptcp_rbs_value_multiply *value, + char *buffer); + +/* + * / operator value + */ +struct mptcp_rbs_value_divide { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_divide *self); + s64 (*execute)(struct mptcp_rbs_value_divide *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_divide_free(struct mptcp_rbs_value_divide *self); +s64 mptcp_rbs_value_divide_execute(struct mptcp_rbs_value_divide *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_divide *value); +int mptcp_rbs_value_divide_print(const struct mptcp_rbs_value_divide *value, + char *buffer); + +/* + * % operator value + */ +struct mptcp_rbs_value_remainder { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_remainder *self); + s64 (*execute)(struct mptcp_rbs_value_remainder *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_remainder_free(struct mptcp_rbs_value_remainder *self); +s64 mptcp_rbs_value_remainder_execute(struct mptcp_rbs_value_remainder *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_remainder *value); +int mptcp_rbs_value_remainder_print( + const struct mptcp_rbs_value_remainder *value, char *buffer); + +/* + * == null operator value + */ +struct mptcp_rbs_value_is_null { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_is_null *self); + s32 (*execute)(struct mptcp_rbs_value_is_null *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value *operand; +}; + +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_new( + struct mptcp_rbs_value *operand); +void mptcp_rbs_value_is_null_free(struct mptcp_rbs_value_is_null *self); +s32 mptcp_rbs_value_is_null_execute(struct mptcp_rbs_value_is_null *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_null *value); +int mptcp_rbs_value_is_null_print(const struct mptcp_rbs_value_is_null *value, + char *buffer); + +/* + * != null operator value + */ +struct mptcp_rbs_value_is_not_null { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_is_not_null *self); + s32 (*execute)(struct mptcp_rbs_value_is_not_null *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value *operand; +}; + +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_new( + struct mptcp_rbs_value *operand); +void mptcp_rbs_value_is_not_null_free(struct mptcp_rbs_value_is_not_null *self); +s32 mptcp_rbs_value_is_not_null_execute( + struct mptcp_rbs_value_is_not_null *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_not_null *value); +int mptcp_rbs_value_is_not_null_print( + const struct mptcp_rbs_value_is_not_null *value, char *buffer); + +/* + * R1-6 integer values + */ +struct mptcp_rbs_value_reg { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_reg *self); + s64 (*execute)(struct mptcp_rbs_value_reg *self, + struct mptcp_rbs_eval_ctx *ctx); + int reg_number; +}; + +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_new(int reg_number); +void mptcp_rbs_value_reg_free(struct mptcp_rbs_value_reg *self); +s64 mptcp_rbs_value_reg_execute(struct mptcp_rbs_value_reg *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_reg *value); +int mptcp_rbs_value_reg_print(const struct mptcp_rbs_value_reg *value, + char *buffer); + +/* + * Q sockbuffer list value + */ +struct mptcp_rbs_value_q { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_q *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_q *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +struct mptcp_rbs_value_q *mptcp_rbs_value_q_new(void); +void mptcp_rbs_value_q_free(struct mptcp_rbs_value_q *self); +struct sk_buff *mptcp_rbs_value_q_execute(struct mptcp_rbs_value_q *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_q *mptcp_rbs_value_q_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_q *value); +int mptcp_rbs_value_q_print(const struct mptcp_rbs_value_q *value, + char *buffer); + +/* + * QU sockbuffer list value + */ +struct mptcp_rbs_value_qu { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_qu *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_qu *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_new(void); +void mptcp_rbs_value_qu_free(struct mptcp_rbs_value_qu *self); +struct sk_buff *mptcp_rbs_value_qu_execute(struct mptcp_rbs_value_qu *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_qu *value); +int mptcp_rbs_value_qu_print(const struct mptcp_rbs_value_qu *value, + char *buffer); + +/* + * RQ sockbuffer list value + */ +struct mptcp_rbs_value_rq { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_rq *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_rq *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_new(void); +void mptcp_rbs_value_rq_free(struct mptcp_rbs_value_rq *self); +struct sk_buff *mptcp_rbs_value_rq_execute(struct mptcp_rbs_value_rq *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_rq *value); +int mptcp_rbs_value_rq_print(const struct mptcp_rbs_value_rq *value, + char *buffer); + +/* + * SUBFLOWS subflow list value + */ +struct mptcp_rbs_value_subflows { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_subflows *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_subflows *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); +}; + +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_new(void); +void mptcp_rbs_value_subflows_free(struct mptcp_rbs_value_subflows *self); +struct tcp_sock *mptcp_rbs_value_subflows_execute( + struct mptcp_rbs_value_subflows *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subflows *value); +int mptcp_rbs_value_subflows_print(const struct mptcp_rbs_value_subflows *value, + char *buffer); + +/* + * CURRENT_TIME_MS integer value + */ +struct mptcp_rbs_value_current_time_ms { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_current_time_ms *self); + s64 (*execute)(struct mptcp_rbs_value_current_time_ms *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_new( + void); +void mptcp_rbs_value_current_time_ms_free( + struct mptcp_rbs_value_current_time_ms *self); +s64 mptcp_rbs_value_current_time_ms_execute( + struct mptcp_rbs_value_current_time_ms *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_current_time_ms *value); +int mptcp_rbs_value_current_time_ms_print( + const struct mptcp_rbs_value_current_time_ms *value, char *buffer); + +/* + * RANDOM integer value + */ +struct mptcp_rbs_value_random { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_random *self); + s64 (*execute)(struct mptcp_rbs_value_random *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +struct mptcp_rbs_value_random *mptcp_rbs_value_random_new(void); +void mptcp_rbs_value_random_free(struct mptcp_rbs_value_random *self); +s64 mptcp_rbs_value_random_execute(struct mptcp_rbs_value_random *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_random *mptcp_rbs_value_random_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_random *value); +int mptcp_rbs_value_random_print(const struct mptcp_rbs_value_random *value, + char *buffer); + +/* + * .RTT integer value + */ +struct mptcp_rbs_value_sbf_rtt { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_rtt *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_rtt *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_rtt_free(struct mptcp_rbs_value_sbf_rtt *self); +s64 mptcp_rbs_value_sbf_rtt_execute(struct mptcp_rbs_value_sbf_rtt *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt *value); +int mptcp_rbs_value_sbf_rtt_print(const struct mptcp_rbs_value_sbf_rtt *value, + char *buffer); + +/* + * .RTT_MS integer value + */ +struct mptcp_rbs_value_sbf_rtt_ms { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_rtt_ms *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_rtt_ms *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_rtt_ms_free(struct mptcp_rbs_value_sbf_rtt_ms *self); +s64 mptcp_rbs_value_sbf_rtt_ms_execute(struct mptcp_rbs_value_sbf_rtt_ms *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_ms *value); +int mptcp_rbs_value_sbf_rtt_ms_print(const struct mptcp_rbs_value_sbf_rtt_ms *value, + char *buffer); + +/* + * .RTT_VAR integer value + */ +struct mptcp_rbs_value_sbf_rtt_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_rtt_var *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_rtt_var *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_rtt_var_free(struct mptcp_rbs_value_sbf_rtt_var *self); +s64 mptcp_rbs_value_sbf_rtt_var_execute(struct mptcp_rbs_value_sbf_rtt_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_var *value); +int mptcp_rbs_value_sbf_rtt_var_print(const struct mptcp_rbs_value_sbf_rtt_var *value, + char *buffer); + + /* + * .USER integer value + */ +struct mptcp_rbs_value_sbf_user { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_user *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_user *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_user_free(struct mptcp_rbs_value_sbf_user *self); +s64 mptcp_rbs_value_sbf_user_execute(struct mptcp_rbs_value_sbf_user *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_user *value); +int mptcp_rbs_value_sbf_user_print(const struct mptcp_rbs_value_sbf_user *value, + char *buffer); + +/* + * .IS_BACKUP boolean value + */ +struct mptcp_rbs_value_sbf_is_backup { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_is_backup *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_is_backup *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_is_backup_free( + struct mptcp_rbs_value_sbf_is_backup *self); +s32 mptcp_rbs_value_sbf_is_backup_execute( + struct mptcp_rbs_value_sbf_is_backup *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_is_backup *value); +int mptcp_rbs_value_sbf_is_backup_print( + const struct mptcp_rbs_value_sbf_is_backup *value, char *buffer); +/* + * .THROTTLED boolean value + */ +struct mptcp_rbs_value_sbf_throttled { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_throttled *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_throttled *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_throttled_free( + struct mptcp_rbs_value_sbf_throttled *self); +s32 mptcp_rbs_value_sbf_throttled_execute( + struct mptcp_rbs_value_sbf_throttled *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_throttled *value); +int mptcp_rbs_value_sbf_throttled_print( + const struct mptcp_rbs_value_sbf_throttled *value, char *buffer); + +/* + * .CWND integer value + */ +struct mptcp_rbs_value_sbf_cwnd { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_cwnd *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_cwnd *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_cwnd_free(struct mptcp_rbs_value_sbf_cwnd *self); +s64 mptcp_rbs_value_sbf_cwnd_execute(struct mptcp_rbs_value_sbf_cwnd *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_cwnd *value); +int mptcp_rbs_value_sbf_cwnd_print(const struct mptcp_rbs_value_sbf_cwnd *value, + char *buffer); + +/* + * .QUEUED integer value + */ +struct mptcp_rbs_value_sbf_queued { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_queued *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_queued *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_queued_free(struct mptcp_rbs_value_sbf_queued *self); +s64 mptcp_rbs_value_sbf_queued_execute(struct mptcp_rbs_value_sbf_queued *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_queued *value); +int mptcp_rbs_value_sbf_queued_print(const struct mptcp_rbs_value_sbf_queued *value, + char *buffer); + +/* + * .SKBS_IN_FLIGHT integer value + */ +struct mptcp_rbs_value_sbf_skbs_in_flight { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_skbs_in_flight *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_skbs_in_flight *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_new(struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_skbs_in_flight_free( + struct mptcp_rbs_value_sbf_skbs_in_flight *self); +s64 mptcp_rbs_value_sbf_skbs_in_flight_execute( + struct mptcp_rbs_value_sbf_skbs_in_flight *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_skbs_in_flight *value); +int mptcp_rbs_value_sbf_skbs_in_flight_print( + const struct mptcp_rbs_value_sbf_skbs_in_flight *value, char *buffer); + +/* + * .LOST_SKBS integer value + */ +struct mptcp_rbs_value_sbf_lost_skbs { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_lost_skbs *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_lost_skbs *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_lost_skbs_free( + struct mptcp_rbs_value_sbf_lost_skbs *self); +s64 mptcp_rbs_value_sbf_lost_skbs_execute( + struct mptcp_rbs_value_sbf_lost_skbs *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lost_skbs *value); +int mptcp_rbs_value_sbf_lost_skbs_print( + const struct mptcp_rbs_value_sbf_lost_skbs *value, char *buffer); + +/* + * .HAS_WINDOW_FOR boolean value + */ +struct mptcp_rbs_value_sbf_has_window_for { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_has_window_for *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_has_window_for *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_new(struct mptcp_rbs_value_sbf *sbf, + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_sbf_has_window_for_free( + struct mptcp_rbs_value_sbf_has_window_for *self); +s32 mptcp_rbs_value_sbf_has_window_for_execute( + struct mptcp_rbs_value_sbf_has_window_for *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_has_window_for *value); +int mptcp_rbs_value_sbf_has_window_for_print( + const struct mptcp_rbs_value_sbf_has_window_for *value, char *buffer); + +/* + * .ID integer value + */ +struct mptcp_rbs_value_sbf_id { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_id *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_id *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_id_free(struct mptcp_rbs_value_sbf_id *self); +s64 mptcp_rbs_value_sbf_id_execute(struct mptcp_rbs_value_sbf_id *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_id *value); +int mptcp_rbs_value_sbf_id_print(const struct mptcp_rbs_value_sbf_id *value, + char *buffer); + +/* + * .DELAY_IN integer value + */ +struct mptcp_rbs_value_sbf_delay_in { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_delay_in *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_delay_in *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_delay_in_free( + struct mptcp_rbs_value_sbf_delay_in *self); +s64 mptcp_rbs_value_sbf_delay_in_execute( + struct mptcp_rbs_value_sbf_delay_in *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_in *value); +int mptcp_rbs_value_sbf_delay_in_print( + const struct mptcp_rbs_value_sbf_delay_in *value, char *buffer); + +/* + * .DELAY_OUT integer value + */ +struct mptcp_rbs_value_sbf_delay_out { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_delay_out *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_delay_out *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_delay_out_free( + struct mptcp_rbs_value_sbf_delay_out *self); +s64 mptcp_rbs_value_sbf_delay_out_execute( + struct mptcp_rbs_value_sbf_delay_out *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_out *value); +int mptcp_rbs_value_sbf_delay_out_print( + const struct mptcp_rbs_value_sbf_delay_out *value, char *buffer); + +/* some helper */ + +void mptcp_rbs_sbf_delay_update(struct tcp_sock *tp, const struct sk_buff *skb); + +void mptcp_rbs_sbf_bw_ack_add(struct tcp_sock *tp, unsigned int bytes); + +void mptcp_rbs_sbf_bw_send_add(struct tcp_sock *tp, unsigned int bytes); + +/* + * .BW_OUT_SEND integer value + */ +struct mptcp_rbs_value_sbf_bw_out_send { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_bw_out_send *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_bw_out_send *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_bw_out_send_free( + struct mptcp_rbs_value_sbf_bw_out_send *self); +s64 mptcp_rbs_value_sbf_bw_out_send_execute( + struct mptcp_rbs_value_sbf_bw_out_send *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_send *value); +int mptcp_rbs_value_sbf_bw_out_send_print( + const struct mptcp_rbs_value_sbf_bw_out_send *value, char *buffer); + +/* + * .BW_OUT_ACK integer value + */ +struct mptcp_rbs_value_sbf_bw_out_ack { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_bw_out_ack *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_bw_out_ack *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_bw_out_ack_free( + struct mptcp_rbs_value_sbf_bw_out_ack *self); +s64 mptcp_rbs_value_sbf_bw_out_ack_execute( + struct mptcp_rbs_value_sbf_bw_out_ack *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_ack *value); +int mptcp_rbs_value_sbf_bw_out_ack_print( + const struct mptcp_rbs_value_sbf_bw_out_ack *value, char *buffer); + +/* + * .SSTHRESH integer value + */ +struct mptcp_rbs_value_sbf_ssthresh { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_ssthresh *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_ssthresh *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_ssthresh_free( + struct mptcp_rbs_value_sbf_ssthresh *self); +s64 mptcp_rbs_value_sbf_ssthresh_execute( + struct mptcp_rbs_value_sbf_ssthresh *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_ssthresh *value); +int mptcp_rbs_value_sbf_ssthresh_print( + const struct mptcp_rbs_value_sbf_ssthresh *value, char *buffer); + +/* + * .LOSSY boolean value + */ +struct mptcp_rbs_value_sbf_lossy { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_lossy *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_lossy *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_lossy_free(struct mptcp_rbs_value_sbf_lossy *self); +s32 mptcp_rbs_value_sbf_lossy_execute(struct mptcp_rbs_value_sbf_lossy *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lossy *value); +int mptcp_rbs_value_sbf_lossy_print( + const struct mptcp_rbs_value_sbf_lossy *value, char *buffer); + +/* + * .NEXT subflow value + */ +struct mptcp_rbs_value_sbf_list_next { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_next *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_next *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + void *prev; + /* + * The next 2 fields ensure that prev is correctly reset after the + * foreach loop finished + */ + u32 exec_count; + bool is_null; +}; + +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_new( + struct mptcp_rbs_value_sbf_list *list); +void mptcp_rbs_value_sbf_list_next_free( + struct mptcp_rbs_value_sbf_list_next *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_next_execute( + struct mptcp_rbs_value_sbf_list_next *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_next *value); +int mptcp_rbs_value_sbf_list_next_print( + const struct mptcp_rbs_value_sbf_list_next *value, char *buffer); + +/* + * .EMPTY boolean value + */ +struct mptcp_rbs_value_sbf_list_empty { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_empty *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; +}; + +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_new( + struct mptcp_rbs_value_sbf_list *list); +void mptcp_rbs_value_sbf_list_empty_free( + struct mptcp_rbs_value_sbf_list_empty *self); +s32 mptcp_rbs_value_sbf_list_empty_execute( + struct mptcp_rbs_value_sbf_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_empty *value); +int mptcp_rbs_value_sbf_list_empty_print( + const struct mptcp_rbs_value_sbf_list_empty *value, char *buffer); + +/* + * .FILTER subflow list value + */ +struct mptcp_rbs_value_sbf_list_filter { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_filter *self); + struct tcp_sock *(*execute)( + struct mptcp_rbs_value_sbf_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_bool *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_new( + void); +void mptcp_rbs_value_sbf_list_filter_free( + struct mptcp_rbs_value_sbf_list_filter *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_execute( + struct mptcp_rbs_value_sbf_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null); +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter *value); +int mptcp_rbs_value_sbf_list_filter_print( + const struct mptcp_rbs_value_sbf_list_filter *value, char *buffer); + +/* + * Special value holding the actual subflow for FILTER subflow list value + */ +struct mptcp_rbs_value_sbf_list_filter_sbf { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_filter_sbf *self); + struct tcp_sock *(*execute)( + struct mptcp_rbs_value_sbf_list_filter_sbf *self, + struct mptcp_rbs_eval_ctx *ctx); + struct tcp_sock **cur; +}; + +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_new(struct tcp_sock **cur); +void mptcp_rbs_value_sbf_list_filter_sbf_free( + struct mptcp_rbs_value_sbf_list_filter_sbf *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_sbf_execute( + struct mptcp_rbs_value_sbf_list_filter_sbf *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter_sbf *value); +int mptcp_rbs_value_sbf_list_filter_sbf_print( + const struct mptcp_rbs_value_sbf_list_filter_sbf *value, char *buffer); + +/* + * .MAX subflow value + */ +struct mptcp_rbs_value_sbf_list_max { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_max *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_max *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_new(void); +void mptcp_rbs_value_sbf_list_max_free( + struct mptcp_rbs_value_sbf_list_max *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_max_execute( + struct mptcp_rbs_value_sbf_list_max *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_max *value); +int mptcp_rbs_value_sbf_list_max_print( + const struct mptcp_rbs_value_sbf_list_max *value, char *buffer); + +/* + * .MIN subflow value + */ +struct mptcp_rbs_value_sbf_list_min { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_min *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_min *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_new(void); +void mptcp_rbs_value_sbf_list_min_free( + struct mptcp_rbs_value_sbf_list_min *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_min_execute( + struct mptcp_rbs_value_sbf_list_min *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_min *value); +int mptcp_rbs_value_sbf_list_min_print( + const struct mptcp_rbs_value_sbf_list_min *value, char *buffer); + +/* + * .GET subflow value + */ +struct mptcp_rbs_value_sbf_list_get { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_get *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_get *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *index; +}; + +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_new( + struct mptcp_rbs_value_sbf_list *list, struct mptcp_rbs_value_int *index); +void mptcp_rbs_value_sbf_list_get_free( + struct mptcp_rbs_value_sbf_list_get *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_get_execute( + struct mptcp_rbs_value_sbf_list_get *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_get *value); +int mptcp_rbs_value_sbf_list_get_print( + const struct mptcp_rbs_value_sbf_list_get *value, char *buffer); + +/* + * .COUNT integer value + */ +struct mptcp_rbs_value_sbf_list_count { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_count *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; +}; + +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_new( + struct mptcp_rbs_value_sbf_list *list); +void mptcp_rbs_value_sbf_list_count_free( + struct mptcp_rbs_value_sbf_list_count *self); +s64 mptcp_rbs_value_sbf_list_count_execute( + struct mptcp_rbs_value_sbf_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_count *value); +int mptcp_rbs_value_sbf_list_count_print( + const struct mptcp_rbs_value_sbf_list_count *value, char *buffer); + +/* + * .SUM integer value + */ +struct mptcp_rbs_value_sbf_list_sum { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_sum *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_list_sum *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_new(void); +void mptcp_rbs_value_sbf_list_sum_free( + struct mptcp_rbs_value_sbf_list_sum *self); +s64 mptcp_rbs_value_sbf_list_sum_execute( + struct mptcp_rbs_value_sbf_list_sum *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_sum *value); +int mptcp_rbs_value_sbf_list_sum_print( + const struct mptcp_rbs_value_sbf_list_sum *value, char *buffer); + +/* + * .NEXT sockbuffer value + */ +struct mptcp_rbs_value_skb_list_next { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_next *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_next *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list *list; + void *prev; + /* + * The next 2 fields ensure that prev is correctly reset after the + * foreach loop finished + */ + u32 exec_count; + bool is_null; +}; + +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_next_free( + struct mptcp_rbs_value_skb_list_next *self); +struct sk_buff *mptcp_rbs_value_skb_list_next_execute( + struct mptcp_rbs_value_skb_list_next *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_next *value); +int mptcp_rbs_value_skb_list_next_print( + const struct mptcp_rbs_value_skb_list_next *value, char *buffer); + +/* + * .SENT_ON boolean value + */ +struct mptcp_rbs_value_skb_sent_on { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_sent_on *self); + s32 (*execute)(struct mptcp_rbs_value_skb_sent_on *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_new( + struct mptcp_rbs_value_skb *skb, struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_skb_sent_on_free(struct mptcp_rbs_value_skb_sent_on *self); +s32 mptcp_rbs_value_skb_sent_on_execute( + struct mptcp_rbs_value_skb_sent_on *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on *value); +int mptcp_rbs_value_skb_sent_on_print( + const struct mptcp_rbs_value_skb_sent_on *value, char *buffer); + +/* + * .SENT_ON_ALL boolean value + */ +struct mptcp_rbs_value_skb_sent_on_all { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_sent_on_all *self); + s32 (*execute)(struct mptcp_rbs_value_skb_sent_on_all *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_sent_on_all_free( + struct mptcp_rbs_value_skb_sent_on_all *self); +s32 mptcp_rbs_value_skb_sent_on_all_execute( + struct mptcp_rbs_value_skb_sent_on_all *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on_all *value); +int mptcp_rbs_value_skb_sent_on_all_print( + const struct mptcp_rbs_value_skb_sent_on_all *value, char *buffer); + +/* + * .USER integer value + */ +struct mptcp_rbs_value_skb_user { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_user *self); + s64 (*execute)(struct mptcp_rbs_value_skb_user *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_user_free(struct mptcp_rbs_value_skb_user *self); +s64 mptcp_rbs_value_skb_user_execute(struct mptcp_rbs_value_skb_user *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_user *value); +int mptcp_rbs_value_skb_user_print(const struct mptcp_rbs_value_skb_user *value, + char *buffer); + +/* + * .SEQ integer value + */ +struct mptcp_rbs_value_skb_seq { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_seq *self); + s64 (*execute)(struct mptcp_rbs_value_skb_seq *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_seq_free(struct mptcp_rbs_value_skb_seq *self); +s64 mptcp_rbs_value_skb_seq_execute(struct mptcp_rbs_value_skb_seq *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_seq *value); +int mptcp_rbs_value_skb_seq_print(const struct mptcp_rbs_value_skb_seq *value, + char *buffer); + +/* + * .PSH boolean value + */ +struct mptcp_rbs_value_skb_psh { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_psh *self); + s32 (*execute)(struct mptcp_rbs_value_skb_psh *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_psh_free(struct mptcp_rbs_value_skb_psh *self); +s32 mptcp_rbs_value_skb_psh_execute( + struct mptcp_rbs_value_skb_psh *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_psh *value); +int mptcp_rbs_value_skb_psh_print( + const struct mptcp_rbs_value_skb_psh *value, char *buffer); + +/* + * .LENGTH integer value + */ +struct mptcp_rbs_value_skb_length { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_length *self); + s64 (*execute)(struct mptcp_rbs_value_skb_length *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_length_free(struct mptcp_rbs_value_skb_length *self); +s64 mptcp_rbs_value_skb_length_execute(struct mptcp_rbs_value_skb_length *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_length *value); +int mptcp_rbs_value_skb_length_print(const struct mptcp_rbs_value_skb_length *value, + char *buffer); + +/* + * .EMPTY boolean value + */ +struct mptcp_rbs_value_skb_list_empty { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_empty *self); + s32 (*execute)(struct mptcp_rbs_value_skb_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_empty_free( + struct mptcp_rbs_value_skb_list_empty *self); +s32 mptcp_rbs_value_skb_list_empty_execute( + struct mptcp_rbs_value_skb_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_empty *value); +int mptcp_rbs_value_skb_list_empty_print( + const struct mptcp_rbs_value_skb_list_empty *value, char *buffer); + +/* + * .POP() sockbuffer value + */ +struct mptcp_rbs_value_skb_list_pop { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_pop *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_pop *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_pop_free( + struct mptcp_rbs_value_skb_list_pop *self); +struct sk_buff *mptcp_rbs_value_skb_list_pop_execute( + struct mptcp_rbs_value_skb_list_pop *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_pop *value); +int mptcp_rbs_value_skb_list_pop_print( + const struct mptcp_rbs_value_skb_list_pop *value, char *buffer); + +/* + * .FILTER sockbuffer list value + */ + +struct mptcp_rbs_value_skb_list_filter_progress { + struct sk_buff *cur; + bool reinject; +}; + +struct mptcp_rbs_value_skb_list_filter { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_filter *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; + struct mptcp_rbs_value_skb_list *list; + struct mptcp_rbs_value_bool *cond; + struct mptcp_rbs_value_skb_list_filter_progress progress; +}; + +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_new( + void); +void mptcp_rbs_value_skb_list_filter_free( + struct mptcp_rbs_value_skb_list_filter *self); +struct sk_buff *mptcp_rbs_value_skb_list_filter_execute( + struct mptcp_rbs_value_skb_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null); +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter *value); +int mptcp_rbs_value_skb_list_filter_print( + const struct mptcp_rbs_value_skb_list_filter *value, char *buffer); + +/* + * Special value holding the actual sockbuffer for FILTER sockbuffer list value + */ +struct mptcp_rbs_value_skb_list_filter_skb { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_filter_skb *self); + struct sk_buff *(*execute)( + struct mptcp_rbs_value_skb_list_filter_skb *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list_filter_progress *progress; +}; + +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_new( + struct mptcp_rbs_value_skb_list_filter_progress *progress); +void mptcp_rbs_value_skb_list_filter_skb_free( + struct mptcp_rbs_value_skb_list_filter_skb *self); +struct sk_buff *mptcp_rbs_value_skb_list_filter_skb_execute( + struct mptcp_rbs_value_skb_list_filter_skb *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter_skb *value); +int mptcp_rbs_value_skb_list_filter_skb_print( + const struct mptcp_rbs_value_skb_list_filter_skb *value, char *buffer); + +/* + * .COUNT integer value + */ +struct mptcp_rbs_value_skb_list_count { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_count *self); + s64 (*execute)(struct mptcp_rbs_value_skb_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_count_free( + struct mptcp_rbs_value_skb_list_count *self); +s64 mptcp_rbs_value_skb_list_count_execute( + struct mptcp_rbs_value_skb_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_count *value); +int mptcp_rbs_value_skb_list_count_print( + const struct mptcp_rbs_value_skb_list_count *value, char *buffer); + +/* + * .TOP sockbuffer value + */ +struct mptcp_rbs_value_skb_list_top { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_top *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_top *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_top_free( + struct mptcp_rbs_value_skb_list_top *self); +struct sk_buff *mptcp_rbs_value_skb_list_top_execute( + struct mptcp_rbs_value_skb_list_top *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_top *value); +int mptcp_rbs_value_skb_list_top_print( + const struct mptcp_rbs_value_skb_list_top *value, char *buffer); + +/* + * .GET sockbuffer value + */ +struct mptcp_rbs_value_skb_list_get { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_get *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_get *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb_list *list; + struct mptcp_rbs_value_int *index; +}; + +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_new( + struct mptcp_rbs_value_skb_list *list, struct mptcp_rbs_value_int *index); +void mptcp_rbs_value_skb_list_get_free( + struct mptcp_rbs_value_skb_list_get *self); +struct sk_buff *mptcp_rbs_value_skb_list_get_execute( + struct mptcp_rbs_value_skb_list_get *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_get *value); +int mptcp_rbs_value_skb_list_get_print( + const struct mptcp_rbs_value_skb_list_get *value, char *buffer); + +/* + * Returns the returned type of a given value kind + */ +enum mptcp_rbs_type_kind mptcp_rbs_value_get_type( + enum mptcp_rbs_value_kind kind); + +#ifndef MPTCP_RBS_CLONE_USER_FUNC_DEFINED +#define MPTCP_RBS_CLONE_USER_FUNC_DEFINED +typedef struct mptcp_rbs_value *(*mptcp_rbs_value_clone_user_func)( + void *user_ctx, const struct mptcp_rbs_value *value); +#endif + +/* + * Creates a copy of a value and all its subvalues + * @value: The value to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_value *mptcp_rbs_value_clone( + const struct mptcp_rbs_value *value, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Writes a string representation of a value to the given buffer + * @value: The value + * @buffer: Pointer to the buffer where the string should be stored or NULL + * Return: Number of written characters + */ +int mptcp_rbs_value_print(const struct mptcp_rbs_value *value, char *buffer); + +/* some helper for bw calculation */ + +u64 mptcp_rbs_sbf_get_bw_send(struct mptcp_rbs_sbf_cb *sbf_cb); +u64 mptcp_rbs_sbf_get_bw_ack(struct mptcp_rbs_sbf_cb *sbf_cb); + +struct sk_buff *mptcp_rbs_next_in_queue(struct sk_buff_head *queue, + struct sk_buff *skb); + +#endif diff --git a/net/mptcp/mptcp_rbs_value_parser.h b/net/mptcp/mptcp_rbs_value_parser.h new file mode 100644 index 0000000000000..07852709f6514 --- /dev/null +++ b/net/mptcp/mptcp_rbs_value_parser.h @@ -0,0 +1,928 @@ +#ifndef _MPTCP_RBS_VALUE_PARSER_H +#define _MPTCP_RBS_VALUE_PARSER_H + +#include "mptcp_rbs_dynarray.h" +#include "mptcp_rbs_lexer.h" +#include "mptcp_rbs_value.h" +#include +#include + +/* + * Clones a string by allocating memory and copying the content + */ +static char *strclone(const char *str) +{ + int len; + char *result; + + if (!str) + return NULL; + + len = strlen(str); + result = kzalloc(len + 1, GFP_KERNEL); + memcpy(result, str, len); + return result; +} + +/* + * Type to manage variables + */ +struct var { + char *name; + int var_number; + enum mptcp_rbs_type_kind type; + union { + bool reinject; + enum mptcp_rbs_value_kind underlying_queue_kind; + }; +}; + +static struct var *var_new(char *name, int var_number, + enum mptcp_rbs_type_kind type, bool *reinject, + enum mptcp_rbs_value_kind *underlying_queue_kind) +{ + struct var *var; + + var = kzalloc(sizeof(struct var), GFP_KERNEL); + var->name = strclone(name); + var->var_number = var_number; + var->type = type; + if (reinject) + var->reinject = *reinject; + else if (underlying_queue_kind) + var->underlying_queue_kind = *underlying_queue_kind; + + return var; +} + +static void var_free(struct var *var) +{ + kfree(var->name); + kfree(var); +} + +/* + * Variable lists + */ + +DECL_DA(var_list, struct var *); + +#define INIT_VAR_LIST(list) INIT_DA(list) + +#define FREE_VAR_LIST(list) FREE_DA(list) + +#define ADD_VAR(list, var) ADD_DA_ITEM(list, var) + +#define FOREACH_VAR(list, var_, cmds) FOREACH_DA_ITEM(list, var_, cmds) + +/* + * Variable list stacks + */ + +DECL_DA(var_list_stack, struct var_list *); + +#define INIT_VAR_LIST_STACK(stack) INIT_DA(stack) + +#define FREE_VAR_LIST_STACK(stack) FREE_DA(stack) + +#define PUSH_VAR_LIST(stack, list) ADD_DA_ITEM(stack, list) + +#define POP_VAR_LIST(stack) DELETE_DA_ITEM(stack, GET_DA_LEN(stack) - 1) + +#define GET_VAR_LIST_STACK_TOP(stack) GET_DA_ITEM(stack, GET_DA_LEN(stack) - 1) + +#define FOREACH_STACK_VAR(stack, var, cmds) \ + do { \ + struct var_list *__list; \ + FOREACH_DA_ITEM_REV(stack, __list, \ + FOREACH_VAR(__list, var, cmds)); \ + } while (0) + +/* + * Type and functions to manage a stack of replacements. Replacements can be + * registered by values to replace identifiers in sub values with values. This + * is useful for values like FILTER(s => ...) + */ + +typedef struct mptcp_rbs_value *(*new_repl_value_func)(void *tag); + +struct repl { + char *name; + new_repl_value_func new_value; + void *tag; +}; + +/* + * Replacement stacks + */ + +DECL_DA(repl_stack, struct repl *); + +#define INIT_REPL_STACK(stack) INIT_DA(stack) + +#define FREE_REPL_STACK(stack) FREE_DA(stack) + +#define PUSH_REPL(stack, repl) ADD_DA_ITEM(stack, repl) + +#define POP_REPL(stack) DELETE_DA_ITEM(stack, GET_DA_LEN(stack) - 1) + +#define FOREACH_REPL(stack, var, cmds) FOREACH_DA_ITEM_REV(stack, var, cmds) + +struct parse_ctx { + char const *str; + int position; + int line; + int line_position; + struct repl_stack repls; + struct var_list_stack var_stack; + /* Index of the next free variable */ + int var_index; + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +static bool expect_token(struct parse_ctx *ctx, enum mptcp_rbs_token_kind kind, + struct mptcp_rbs_token *token); +static bool lookahead_token(struct parse_ctx *ctx, + struct mptcp_rbs_token *token); +static struct mptcp_rbs_value_bool *parse_value_bool(struct parse_ctx *ctx); +static struct mptcp_rbs_value_int *parse_value_int(struct parse_ctx *ctx); +static struct mptcp_rbs_value_string *parse_value_string(struct parse_ctx *ctx); +static struct mptcp_rbs_value_sbf *parse_value_sbf(struct parse_ctx *ctx); +static struct mptcp_rbs_value_skb *parse_value_skb(struct parse_ctx *ctx); + +/* + * Q sockbuffer list value + */ + +static struct mptcp_rbs_value_q *mptcp_rbs_value_q_parse(struct parse_ctx *ctx) +{ + ctx->underlying_queue_kind = VALUE_KIND_Q; + return mptcp_rbs_value_q_new(); +} + +/* + * QU sockbuffer list value + */ + +static struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_parse( + struct parse_ctx *ctx) +{ + ctx->underlying_queue_kind = VALUE_KIND_QU; + return mptcp_rbs_value_qu_new(); +} + +/* + * RQ sockbuffer list value + */ + +static struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_parse( + struct parse_ctx *ctx) +{ + ctx->underlying_queue_kind = VALUE_KIND_RQ; + return mptcp_rbs_value_rq_new(); +} + +/* + * .RTT integer value + */ + +static struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_rtt_new(sbf); +} + +/* + * .RTT_MS integer value + */ + +static struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_rtt_ms_new(sbf); +} + +/* + * .RTT_VAR integer value + */ + +static struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_rtt_var_new(sbf); +} + +/* + * .USER integer value + */ + +static struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_user_new(sbf); +} + +/* + * .IS_BACKUP boolean value + */ + +static struct mptcp_rbs_value_sbf_is_backup * +mptcp_rbs_value_sbf_is_backup_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_is_backup_new(sbf); +} + +/* + * .CWND integer value + */ + +static struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_cwnd_new(sbf); +} + +/* + * .QUEUED integer value + */ + +static struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_queued_new(sbf); +} + +/* + * .SKBS_IN_FLIGHT integer value + */ + +static struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_skbs_in_flight_new(sbf); +} + +/* + * .LOST_SKBS integer value + */ + +static struct mptcp_rbs_value_sbf_lost_skbs * +mptcp_rbs_value_sbf_lost_skbs_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_lost_skbs_new(sbf); +} + +/* + * .HAS_WINDOW_FOR boolean value + */ + +static struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_skb *skb; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Sockbuffer value must follow */ + skb = parse_value_skb(ctx); + if (!skb) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + skb->free(skb); + return NULL; + } + + return mptcp_rbs_value_sbf_has_window_for_new(sbf, skb); +} + +/* + * .ID integer value + */ + +static struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_id_new(sbf); +} + +/* + * .DELAY_IN integer value + */ + +static struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_delay_in_new(sbf); +} + +/* + * .DELAY_OUT integer value + */ + +static struct mptcp_rbs_value_sbf_delay_out * +mptcp_rbs_value_sbf_delay_out_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_delay_out_new(sbf); +} + +/* + * .BW_OUT_SEND integer value + */ + +static struct mptcp_rbs_value_sbf_bw_out_send * +mptcp_rbs_value_sbf_bw_out_send_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_bw_out_send_new(sbf); +} + +/* + * .BW_OUT_ACK integer value + */ + +static struct mptcp_rbs_value_sbf_bw_out_ack * +mptcp_rbs_value_sbf_bw_out_ack_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_bw_out_ack_new(sbf); +} + +/* + * .SSTHRESH integer value + */ + +static struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_ssthresh_new(sbf); +} + +/* + * .THROTTLED boolean value + */ + +static struct mptcp_rbs_value_sbf_throttled * +mptcp_rbs_value_sbf_throttled_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_throttled_new(sbf); +} + +/* + * .LOSSY boolean value + */ + +static struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_lossy_new(sbf); +} + +/* + * SUBFLOWS subflow list value + */ + +static struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_parse( + struct parse_ctx *ctx) +{ + return mptcp_rbs_value_subflows_new(); +} + +/* + * CURRENT_TIME_MS integer value + */ + +static struct mptcp_rbs_value_current_time_ms * +mptcp_rbs_value_current_time_ms_parse(struct parse_ctx *ctx) +{ + return mptcp_rbs_value_current_time_ms_new(); +} + +/* + * RANDOM integer value + */ + +static struct mptcp_rbs_value_random *mptcp_rbs_value_random_parse( + struct parse_ctx *ctx) +{ + return mptcp_rbs_value_random_new(); +} + +/* + * .EMPTY boolean value + */ + +static struct mptcp_rbs_value_sbf_list_empty * +mptcp_rbs_value_sbf_list_empty_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf_list *list) +{ + return mptcp_rbs_value_sbf_list_empty_new(list); +} + +/* + * .FILTER subflow list value + */ + +static struct mptcp_rbs_value_sbf_list_filter * +mptcp_rbs_value_sbf_list_filter_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_filter *value; + struct repl repl; + struct mptcp_rbs_value_bool *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_filter_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Boolean value must follow */ + cond = parse_value_bool(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_filter_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * Special value holding the actual subflow for FILTER subflow list value + */ + +static struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_parse(struct parse_ctx *ctx) +{ + /* This should never be called because this value cannot be parsed */ + BUG_ON(true); + return NULL; +} + +/* + * .MAX subflow value + */ + +static struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_max *value; + struct repl repl; + struct mptcp_rbs_value_int *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_max_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Integer value must follow */ + cond = parse_value_int(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_max_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * .MIN subflow value + */ + +static struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_min *value; + struct repl repl; + struct mptcp_rbs_value_int *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_min_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Integer value must follow */ + cond = parse_value_int(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_min_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * .GET subflow value + */ + +static struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_int *index; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Integer value must follow */ + index = parse_value_int(ctx); + if (!index) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + index->free(index); + return NULL; + } + + return mptcp_rbs_value_sbf_list_get_new(list, index); +} + +/* + * .COUNT integer value + */ + +static struct mptcp_rbs_value_sbf_list_count * +mptcp_rbs_value_sbf_list_count_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf_list *list) +{ + return mptcp_rbs_value_sbf_list_count_new(list); +} + +/* + * .SUM integer value + */ + +static struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_sum *value; + struct repl repl; + struct mptcp_rbs_value_int *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_sum_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Integer value must follow */ + cond = parse_value_int(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_sum_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * .SENT_ON boolean value + */ + +static struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_sbf *sbf; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Subflow value must follow */ + sbf = parse_value_sbf(ctx); + if (!sbf) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + sbf->free(sbf); + return NULL; + } + + return mptcp_rbs_value_skb_sent_on_new(skb, sbf); +} + +/* + * .SENT_ON_ALL boolean value + */ + +static struct mptcp_rbs_value_skb_sent_on_all * +mptcp_rbs_value_skb_sent_on_all_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_sent_on_all_new(skb); +} + +/* + * .USER integer value + */ + +static struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_user_new(skb); +} + +/* + * .SEQ integer value + */ + +static struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_seq_new(skb); +} + +/* + * .PSH integer value + */ + +static struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_psh_new(skb); +} + +/* + * .LENGTH integer value + */ + +static struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_length_new(skb); +} + +/* + * .EMPTY boolean value + */ + +static struct mptcp_rbs_value_skb_list_empty * +mptcp_rbs_value_skb_list_empty_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb_list *list) +{ + return mptcp_rbs_value_skb_list_empty_new(list); +} + +/* + * .POP() sockbuffer value + */ + +static struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_token token; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) + return NULL; + + return mptcp_rbs_value_skb_list_pop_new(list); +} + +/* + * .FILTER sockbuffer list value + */ + +static struct mptcp_rbs_value_skb_list_filter * +mptcp_rbs_value_skb_list_filter_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_skb_list_filter *value; + struct repl repl; + struct mptcp_rbs_value_bool *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_skb_list_filter_new(); + value->progress.reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_skb_list_filter_skb_new; + repl.tag = &value->progress; + PUSH_REPL(&ctx->repls, &repl); + + /* Boolean value must follow */ + cond = parse_value_bool(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_skb_list_filter_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + value->underlying_queue_kind = list->underlying_queue_kind; + + return value; +} + +/* + * Special value holding the actual sockbuffer for FILTER sockbuffer list value + */ + +static struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_parse(struct parse_ctx *ctx) +{ + /* This should never be called because this value cannot be parsed */ + BUG_ON(true); + return NULL; +} + +/* + * .COUNT integer value + */ + +static struct mptcp_rbs_value_skb_list_count * +mptcp_rbs_value_skb_list_count_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb_list *list) +{ + return mptcp_rbs_value_skb_list_count_new(list); +} + +/* + * .TOP sockbuffer value + */ + +static struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb_list *list) +{ + return mptcp_rbs_value_skb_list_top_new(list); +} + +/* + * .GET sockbuffer value + */ + +static struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_int *index; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Integer value must follow */ + index = parse_value_int(ctx); + if (!index) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + index->free(index); + return NULL; + } + + return mptcp_rbs_value_skb_list_get_new(list, index); +} + +#endif diff --git a/net/mptcp/mptcp_rbs_var.c b/net/mptcp/mptcp_rbs_var.c new file mode 100644 index 0000000000000..785cb83ab1601 --- /dev/null +++ b/net/mptcp/mptcp_rbs_var.c @@ -0,0 +1,29 @@ +#include "mptcp_rbs_var.h" +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_var_free(struct mptcp_rbs_var *self) +{ + if (self->is_lazy) + return; + + switch (self->type) { + case TYPE_KIND_NULL: + case TYPE_KIND_BOOL: + case TYPE_KIND_INT: + case TYPE_KIND_STRING: + case TYPE_KIND_SBF: + case TYPE_KIND_SKB: + break; + case TYPE_KIND_SBFLIST: { + kfree(self->sbf_list_value); + break; + } + case TYPE_KIND_SKBLIST: { + kfree(self->skb_list_value); + break; + } + } +} +#pragma GCC diagnostic pop diff --git a/net/mptcp/mptcp_rbs_var.h b/net/mptcp/mptcp_rbs_var.h new file mode 100644 index 0000000000000..e96ff58adcb02 --- /dev/null +++ b/net/mptcp/mptcp_rbs_var.h @@ -0,0 +1,31 @@ +#ifndef _MPTCP_RBS_VAR_H +#define _MPTCP_RBS_VAR_H + +#include "mptcp_rbs_type.h" +#include "mptcp_rbs_value.h" +#include + +#define MPTCP_RBS_MAX_VAR_COUNT 24 + +/* Struct for a single variable */ +struct mptcp_rbs_var { + enum mptcp_rbs_type_kind type; + bool is_lazy; + union { + s32 bool_value; + s64 int_value; + char *string_value; + struct tcp_sock *sbf_value; + struct tcp_sock **sbf_list_value; + struct sk_buff *skb_value; + struct sk_buff **skb_list_value; + struct mptcp_rbs_value *lazy_value; + }; +}; + +/* + * Releases a variable struct + */ +void mptcp_rbs_var_free(struct mptcp_rbs_var *self); + +#endif diff --git a/net/mptcp/mptcp_rr.c b/net/mptcp/mptcp_rr.c index 8910ba9e6052e..e36871c9eb186 100644 --- a/net/mptcp/mptcp_rr.c +++ b/net/mptcp/mptcp_rr.c @@ -2,6 +2,7 @@ #include #include +#include static unsigned char num_segments __read_mostly = 1; module_param(num_segments, byte, 0644); @@ -171,11 +172,17 @@ static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int * return skb; } +u64 total_rr_time_skb; +u64 total_rr_time_no_skb; +u64 total_rr_count_skb; +u64 total_rr_count_no_skb; + static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, int *reinject, struct sock **subsk, unsigned int *limit) { + u64 begin_time = __native_read_tsc(); const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct sock *sk_it, *choose_sk = NULL; struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject); @@ -185,13 +192,23 @@ static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, /* As we set it, we have to reset it as well. */ *limit = 0; - if (!skb) + if (!skb) { + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; return NULL; + } if (*reinject) { *subsk = rr_get_available_subflow(meta_sk, skb, false); - if (!*subsk) + if (!*subsk) { + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; + return NULL; + } + + total_rr_time_skb += __native_read_tsc() - begin_time; + total_rr_count_skb++; return skb; } @@ -252,8 +269,12 @@ static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, struct tcp_sock *choose_tp = tcp_sk(choose_sk); struct rrsched_priv *rsp = rrsched_get_priv(choose_tp); - if (!mptcp_rr_is_available(choose_sk, skb, false, true)) + if (!mptcp_rr_is_available(choose_sk, skb, false, true)) { + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; + return NULL; + } *subsk = choose_sk; mss_now = tcp_current_mss(*subsk); @@ -264,9 +285,15 @@ static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, else rsp->quota++; + total_rr_time_skb += __native_read_tsc() - begin_time; + total_rr_count_skb++; + return skb; } + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; + return NULL; } diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c index 2e1d08a6e930f..42ecbef333653 100644 --- a/net/mptcp/mptcp_sched.c +++ b/net/mptcp/mptcp_sched.c @@ -2,10 +2,14 @@ #include #include +#include +#include static DEFINE_SPINLOCK(mptcp_sched_list_lock); static LIST_HEAD(mptcp_sched_list); +static LIST_HEAD(mptcp_sched_select_list); + struct defsched_priv { u32 last_rbuf_opti; }; @@ -384,11 +388,17 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) return skb; } +u64 total_default_time_skb; +u64 total_default_time_no_skb; +u64 total_default_count_skb; +u64 total_default_count_no_skb; + static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, int *reinject, struct sock **subsk, unsigned int *limit) { + u64 begin_time = __native_read_tsc(); struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject); unsigned int mss_now; struct tcp_sock *subtp; @@ -398,12 +408,18 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, /* As we set it, we have to reset it as well. */ *limit = 0; - if (!skb) + if (!skb) { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; return NULL; + } *subsk = get_available_subflow(meta_sk, skb, false); - if (!*subsk) + if (!*subsk) { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; return NULL; + } subtp = tcp_sk(*subsk); mss_now = tcp_current_mss(*subsk); @@ -412,13 +428,21 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, skb = mptcp_rcv_buf_optimization(*subsk, 1); if (skb) *reinject = -1; - else + else { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; + return NULL; + } } /* No splitting required, as we will only send one single segment */ - if (skb->len <= mss_now) + if (skb->len <= mss_now) { + total_default_time_skb += __native_read_tsc() - begin_time; + total_default_count_skb++; + return skb; + } /* The following is similar to tcp_mss_split_point, but * we do not care about nagle, because we will anyways @@ -432,8 +456,12 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, if (!gso_max_segs) /* No gso supported on the subflow's NIC */ gso_max_segs = 1; max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); - if (!max_segs) + if (!max_segs) { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; + return NULL; + } max_len = mss_now * max_segs; window = tcp_wnd_end(subtp) - subtp->write_seq; @@ -446,6 +474,9 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, /* Or, take the window */ *limit = needed; + total_default_time_skb += __native_read_tsc() - begin_time; + total_default_count_skb++; + return skb; } @@ -464,7 +495,7 @@ struct mptcp_sched_ops mptcp_sched_default = { .owner = THIS_MODULE, }; -static struct mptcp_sched_ops *mptcp_sched_find(const char *name) +struct mptcp_sched_ops *mptcp_sched_find(const char *name) { struct mptcp_sched_ops *e; @@ -545,9 +576,92 @@ int mptcp_set_default_scheduler(const char *name) return ret; } -void mptcp_init_scheduler(struct mptcp_cb *mpcb) +int mptcp_set_default_scheduler_for_tuple(const char *name, __be32 dstip, + __be16 sport, unsigned long till_time_s) { struct mptcp_sched_ops *sched; + int ret = -ENOENT; + + spin_lock(&mptcp_sched_list_lock); + sched = mptcp_sched_find(name); + mptcp_debug("afr: found scheduler %p for %s\n", sched, name); +#ifdef CONFIG_MODULES + if (!sched && capable(CAP_NET_ADMIN)) { + spin_unlock(&mptcp_sched_list_lock); + + request_module("mptcp_%s", name); + spin_lock(&mptcp_sched_list_lock); + sched = mptcp_sched_find(name); + } +#endif + mptcp_debug("afr: found scheduler %p for %s after module\n", sched, name); + if (sched) { + struct mptcp_sched_select *sched_select = kzalloc(sizeof(struct mptcp_sched_select), GFP_ATOMIC); + sched_select->dstip = dstip; + sched_select->sport = sport; + sched_select->till_time_s = till_time_s; + sched_select->sched_ops = sched; + + list_add(&sched_select->list, &mptcp_sched_select_list); + ret = 0; + } else { + pr_info("%s is not available\n", name); + } + spin_unlock(&mptcp_sched_list_lock); + + return ret; +} + +struct mptcp_sched_ops *mptcp_sched_find_for_tuple(__be32 dstip, __be16 sport_end) +{ + struct mptcp_sched_select *e; + struct mptcp_sched_select *n; + struct timespec ts; + __be16 sport = ntohs(sport_end); + + getnstimeofday(&ts); + + mptcp_debug("afr: searching for ip %i.%i.%i.%i and port %i in list at jiffy %llu\n", dstip & 0x000000FF, + (dstip & 0x0000FF00)>>8, + (dstip & 0x00FF0000)>>16, + (dstip & 0xFF000000)>>24, + sport, ts.tv_sec); + list_for_each_entry_safe(e, n, &mptcp_sched_select_list, list) { + mptcp_debug("afr: comparing with ip %i.%i.%i.%i and port %i in list with sched %p\n", e->dstip & 0x000000FF, + (e->dstip & 0x0000FF00)>>8, + (e->dstip & 0x00FF0000)>>16, + (e->dstip & 0xFF000000)>>24, + e->sport, e->sched_ops); + if (e->till_time_s < ts.tv_sec) { + mptcp_debug("afr: removing selection scheduler\n"); + list_del(e); + } + + + + if (e->dstip == dstip && e->sport == sport) + return e->sched_ops; + } + + return NULL; +} + +void mptcp_init_scheduler(struct mptcp_cb *mpcb) +{ + struct mptcp_sched_ops *sched = NULL; + + /* afr tests to set scheduler specific for port */ + struct inet_sock *isk_tmp = inet_sk(mpcb->master_sk); + + /* local port and remote ip should allow to specify scheduler */ + mptcp_debug("afr new scheduler for %u:%i\n", isk_tmp->inet_saddr, isk_tmp->inet_dport); + sched = mptcp_sched_find_for_tuple(isk_tmp->inet_saddr, isk_tmp->inet_dport); + mptcp_debug("afr found %p\n", sched); + + if(sched) { + mpcb->sched_ops = sched; + return; + } rcu_read_lock(); list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e6163017c42db..5d0c6fd59475a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -89,6 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); + nf_queue_nf_hook_drop(reg); } EXPORT_SYMBOL(nf_unregister_hook); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806a862e6..38fbc194b9cb7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3a14548..24c554201a766 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (sched->add_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { - if (sched->upd_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->upd_dest) sched->upd_dest(svc, dest); } } @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, struct ip_vs_scheduler *sched; sched = rcu_dereference_protected(svc->scheduler, 1); - if (sched->del_dest) + if (sched && sched->del_dest) sched->del_dest(svc, dest); } } @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_use_count_inc(); /* Lookup the scheduler by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - ret = -ENOENT; - goto out_err; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + ret = -ENOENT; + goto out_err; + } } if (u->pe_name && *u->pe_name) { @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) - goto out_err; - sched = NULL; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) + goto out_err; + sched = NULL; + } /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { - struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* * Lookup the scheduler, by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - return -ENOENT; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + return -ENOENT; + } } old_sched = sched; @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { + if (old_sched) { + ip_vs_unbind_scheduler(svc, old_sched); + RCU_INIT_POINTER(svc->scheduler, NULL); + /* Wait all svc->sched_data users */ + synchronize_rcu(); + } /* Bind the new scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) { - old_sched = sched; - goto out; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + ip_vs_scheduler_put(sched); + goto out; + } } - /* Unbind the old scheduler on success */ - ip_vs_unbind_scheduler(svc, old_sched); } /* @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - sched->name); + sched_name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - sched->name, + sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, sched->name, + svc->fwmark, sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; struct ip_vs_kstats kstats; + char *sched_name; sched = rcu_dereference_protected(src->scheduler, 1); + sched_name = sched ? sched->name : "none"; dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; struct ip_vs_kstats kstats; + char *sched_name; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } sched = rcu_dereference_protected(svc->scheduler, 1); + sched_name = sched ? sched->name : "none"; pe = rcu_dereference_protected(svc->pe, 1); - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c71f399..7e81416479434 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can not be set to NULL */ + /* svc->scheduler can be set to NULL only by caller */ } @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { - struct ip_vs_scheduler *sched; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; - sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - sched->name, svc->fwmark, svc->fwmark, msg); + sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 19b9cce6c210c..150047c739fa6 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 19986ec5f21ad..258f1e05250fa 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -519,10 +518,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); } return ret; } +/* In the event of a remote destination, it's possible that we would have + * matches against an old socket (particularly a TIME-WAIT socket). This + * causes havoc down the line (ip_local_out et. al. expect regular sockets + * and invalid memory accesses will happen) so simply drop the association + * in this case. +*/ +static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) +{ + /* If dev is set, the packet came from the LOCAL_IN callback and + * not from a local TCP socket. + */ + if (skb->dev) + skb_orphan(skb); +} + /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) @@ -534,12 +550,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); + + /* Remove the early_demux association unless it's bound for the + * exact same port and address on this host after translation. + */ + if (!local || cp->vport != cp->dport || + !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) + ip_vs_drop_early_demux_sk(skb); + if (!local) { skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; + return ret; } @@ -553,7 +580,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { + ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else @@ -841,6 +871,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, struct ipv6hdr *old_ipv6h = NULL; #endif + ip_vs_drop_early_demux_sk(skb); + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 7a17070c5dabb..b45a4223cb058 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1c23940a86ad..6b8b0abbfab48 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone, } err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) - goto err_exp; - - return 0; -err_exp: nf_ct_expect_put(exp); err_ct: nf_ct_put(ct); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index ea7f36784b3da..399210693c2a8 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -19,6 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum); +void nf_queue_nf_hook_drop(struct nf_hook_ops *ops); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 675d12c69e325..a5d41dfa9f05d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -107,12 +107,17 @@ EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { + const struct nf_logger *log; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < NFPROTO_NUMPROTO; i++) - RCU_INIT_POINTER(loggers[i][logger->type], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = nft_log_dereference(loggers[i][logger->type]); + if (log == logger) + RCU_INIT_POINTER(loggers[i][logger->type], NULL); + } mutex_unlock(&nf_log_mutex); + synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unregister); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 2e88032cd5ad2..cd60d397fe056 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -105,6 +105,23 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); +void nf_queue_nf_hook_drop(struct nf_hook_ops *ops) +{ + const struct nf_queue_handler *qh; + struct net *net; + + rtnl_lock(); + rcu_read_lock(); + qh = rcu_dereference(queue_handler); + if (qh) { + for_each_net(net) { + qh->nf_hook_drop(net, ops); + } + } + rcu_read_unlock(); + rtnl_unlock(); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f153b07073afb..f77bad46ac683 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -114,7 +114,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; - const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); + const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet); + const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; @@ -124,6 +125,10 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) int rulenum; unsigned int gencursor = nft_genmask_cur(net); + /* Ignore chains that are not for the current network namespace */ + if (!net_eq(net, chain_net)) + return NF_ACCEPT; + do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c90ecd76..69e3ceffa14dd 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -432,6 +432,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); + u_int16_t res_id; int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || @@ -456,7 +457,12 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfgenmsg = nlmsg_data(nlh); skb_pull(skb, msglen); - nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + /* Work around old nft using host byte order */ + if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES) + res_id = NFNL_SUBSYS_NFTABLES; + else + res_id = ntohs(nfgenmsg->res_id); + nfnetlink_rcv_batch(skb, nlh, res_id); } else { netlink_rcv_skb(skb, &nfnetlink_rcv_msg); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 11c7682fa0ea1..32d0437abdd8a 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -824,6 +824,27 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +{ + return entry->elem == (struct nf_hook_ops *)ops_ptr; +} + +static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +{ + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + int i; + + rcu_read_lock(); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct nfqnl_instance *inst; + struct hlist_head *head = &q->instance_table[i]; + + hlist_for_each_entry_rcu(inst, head, hlist) + nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook); + } + rcu_read_unlock(); +} + static int nfqnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -1031,7 +1052,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { }; static const struct nf_queue_handler nfqh = { - .outfn = &nfqnl_enqueue_packet, + .outfn = &nfqnl_enqueue_packet, + .nf_hook_drop = &nfqnl_nf_hook_drop, }; static int diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7f29cfc76349f..4d05c7bf5a033 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -617,6 +617,13 @@ struct nft_xt { static struct nft_expr_type nft_match_type; +static bool nft_match_cmp(const struct xt_match *match, + const char *name, u32 rev, u32 family) +{ + return strcmp(match->name, name) == 0 && match->revision == rev && + (match->family == NFPROTO_UNSPEC || match->family == family); +} + static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -624,7 +631,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct nft_xt *nft_match; struct xt_match *match; char *mt_name; - __u32 rev, family; + u32 rev, family; if (tb[NFTA_MATCH_NAME] == NULL || tb[NFTA_MATCH_REV] == NULL || @@ -639,8 +646,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) { + if (nft_match_cmp(match, mt_name, rev, family)) { if (!try_module_get(match->me)) return ERR_PTR(-ENOENT); @@ -691,6 +697,13 @@ static LIST_HEAD(nft_target_list); static struct nft_expr_type nft_target_type; +static bool nft_target_cmp(const struct xt_target *tg, + const char *name, u32 rev, u32 family) +{ + return strcmp(tg->name, name) == 0 && tg->revision == rev && + (tg->family == NFPROTO_UNSPEC || tg->family == family); +} + static const struct nft_expr_ops * nft_target_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -698,7 +711,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct nft_xt *nft_target; struct xt_target *target; char *tg_name; - __u32 rev, family; + u32 rev, family; if (tb[NFTA_TARGET_NAME] == NULL || tb[NFTA_TARGET_REV] == NULL || @@ -713,8 +726,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) { + if (nft_target_cmp(target, tg_name, rev, family)) { if (!try_module_get(target->me)) return ERR_PTR(-ENOENT); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bf6e76643f787..0d6038c87bef7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -123,6 +123,24 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } +static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, + gfp_t gfp_mask) +{ + unsigned int len = skb_end_offset(skb); + struct sk_buff *new; + + new = alloc_skb(len, gfp_mask); + if (new == NULL) + return NULL; + + NETLINK_CB(new).portid = NETLINK_CB(skb).portid; + NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; + NETLINK_CB(new).creds = NETLINK_CB(skb).creds; + + memcpy(skb_put(new, len), skb->data, len); + return new; +} + int netlink_add_tap(struct netlink_tap *nt) { if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -204,7 +222,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, int ret = -ENOMEM; dev_hold(dev); - nskb = skb_clone(skb, GFP_ATOMIC); + + if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + nskb = netlink_to_full_skb(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); @@ -276,11 +298,6 @@ static void netlink_rcv_wake(struct sock *sk) } #ifdef CONFIG_NETLINK_MMAP -static bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -} - static bool netlink_rx_is_mmaped(struct sock *sk) { return nlk_sk(sk)->rx_ring.pg_vec != NULL; @@ -355,25 +372,52 @@ static void **alloc_pg_vec(struct netlink_sock *nlk, return NULL; } + +static void +__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, + unsigned int order) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct sk_buff_head *queue; + struct netlink_ring *ring; + + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +} + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool closing, bool tx_ring) + bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; - struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; - int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (!closing) { - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - } + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -405,31 +449,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, return -EINVAL; } - err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (closing || atomic_read(&nlk->mapped) == 0) { - err = 0; - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); + if (atomic_read(&nlk->mapped) == 0) { + __netlink_set_ring(sk, req, tx_ring, pg_vec, order); + mutex_unlock(&nlk->pg_vec_lock); + return 0; } + mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - return err; + + return -EBUSY; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -817,7 +849,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) } #else /* CONFIG_NETLINK_MMAP */ -#define netlink_skb_is_mmaped(skb) false #define netlink_rx_is_mmaped(sk) false #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap @@ -898,10 +929,10 @@ static void netlink_sock_destruct(struct sock *sk) memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - netlink_set_ring(sk, &req, true, false); + __netlink_set_ring(sk, &req, false, NULL, 0); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - netlink_set_ring(sk, &req, true, true); + __netlink_set_ring(sk, &req, true, NULL, 0); } #endif /* CONFIG_NETLINK_MMAP */ @@ -1065,8 +1096,8 @@ static int netlink_insert(struct sock *sk, u32 portid) lock_sock(sk); - err = -EBUSY; - if (nlk_sk(sk)->portid) + err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; + if (nlk_sk(sk)->bound) goto err; err = -ENOMEM; @@ -1079,12 +1110,21 @@ static int netlink_insert(struct sock *sk, u32 portid) err = __netlink_insert(table, sk); if (err) { + /* In case the hashtable backend returns with -EBUSY + * from here, it must not escape to the caller. + */ + if (unlikely(err == -EBUSY)) + err = -EOVERFLOW; if (err == -EEXIST) err = -EADDRINUSE; - nlk_sk(sk)->portid = 0; sock_put(sk); + goto err; } + /* We need to ensure that the socket is hashed and visible. */ + smp_wmb(); + nlk_sk(sk)->bound = portid; + err: release_sock(sk); return err; @@ -1464,6 +1504,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; long unsigned int groups = nladdr->nl_groups; + bool bound; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1480,9 +1521,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) + bound = nlk->bound; + if (bound) { + /* Ensure nlk->portid is up-to-date. */ + smp_rmb(); + if (nladdr->nl_pid != nlk->portid) return -EINVAL; + } if (nlk->netlink_bind && groups) { int group; @@ -1498,7 +1544,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, } } - if (!nlk->portid) { + /* No need for barriers here as we return to user-space without + * using any of the bound attributes. + */ + if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); @@ -1546,7 +1595,10 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->portid) + /* No need for barriers here as we return to user-space without + * using any of the bound attributes. + */ + if (!nlk->bound) err = netlink_autobind(sock); if (err == 0) { @@ -2197,7 +2249,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, false, + err = netlink_set_ring(sk, &req, optname == NETLINK_TX_RING); break; } @@ -2303,10 +2355,13 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; } - if (!nlk->portid) { + if (!nlk->bound) { err = netlink_autobind(sock); if (err) goto out; + } else { + /* Ensure nlk is hashed and visible. */ + smp_rmb(); } /* It's a really convoluted way for userland to ask for mmaped @@ -2629,6 +2684,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; + int alloc_min_size; int alloc_size; mutex_lock(nlk->cb_mutex); @@ -2637,9 +2693,6 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - cb = &nlk->cb; - alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; @@ -2649,23 +2702,35 @@ static int netlink_dump(struct sock *sk) * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ - if (alloc_size < nlk->max_recvmsg_len) { - skb = netlink_alloc_skb(sk, - nlk->max_recvmsg_len, - nlk->portid, + cb = &nlk->cb; + alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + + if (alloc_min_size < nlk->max_recvmsg_len) { + alloc_size = nlk->max_recvmsg_len; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - /* available room should be exact amount to avoid MSG_TRUNC */ - if (skb) - skb_reserve(skb, skb_tailroom(skb) - - nlk->max_recvmsg_len); } - if (!skb) + if (!skb) { + alloc_size = alloc_min_size; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + } if (!skb) goto errout_skb; + + /* Trim skb to allocated size. User is expected to provide buffer as + * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * netlink_recvmsg())). dump will pack as many smaller messages as + * could fit within the allocated skb. skb is typically allocated + * with larger space than required (could be as much as near 2x the + * requested size with align to next power of 2 approach). Allowing + * dump to use the excess space makes it difficult for a user to have a + * reasonable static buffer based on the expected largest dump of a + * single netdev. The outcome is MSG_TRUNC error. + */ + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 89008405d6b4d..14437d9b1965d 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -35,6 +35,7 @@ struct netlink_sock { unsigned long state; size_t max_recvmsg_len; wait_queue_head_t wait; + bool bound; bool cb_running; struct netlink_callback cb; struct mutex *cb_mutex; @@ -59,6 +60,15 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } +static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ +#ifdef CONFIG_NETLINK_MMAP + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +#else + return false; +#endif /* CONFIG_NETLINK_MMAP */ +} + struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ed54ec5338363..91ecbd1c2ec17 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -101,6 +101,20 @@ struct nci_hcp_packet { #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) +static int nci_hci_result_to_errno(u8 result) +{ + switch (result) { + case NCI_HCI_ANY_OK: + return 0; + case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: + return -EOPNOTSUPP; + case NCI_HCI_ANY_E_TIMEOUT: + return -ETIME; + default: + return -1; + } +} + /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { @@ -146,18 +160,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (!conn_info) return -EPROTO; - skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + + i = 0; + skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); + skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *skb_push(skb, 1) = data_type; - i = 0; - len = conn_info->max_pkt_payload_len; - do { + len = conn_info->max_pkt_payload_len; + /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { @@ -177,9 +191,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return r; i += len; + if (i < data_len) { - skb_trim(skb, 0); - skb_pull(skb, len); + skb = nci_skb_alloc(ndev, + conn_info->max_pkt_payload_len + + NCI_DATA_HDR_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); @@ -212,7 +232,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { - struct nci_conn_info *conn_info; + struct nci_hcp_message *message; + struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -232,9 +253,15 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - - if (r == NCI_STATUS_OK) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } @@ -328,9 +355,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct nci_conn_info *conn_info; u8 status = result; - if (result != NCI_HCI_ANY_OK) - goto exit; - conn_info = ndev->hci_dev->conn_info; if (!conn_info) { status = NCI_STATUS_REJECTED; @@ -340,7 +364,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, conn_info->rx_skb = skb; exit: - nci_req_complete(ndev, status); + nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. @@ -378,7 +402,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work) u8 pipe, type, instruction; while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { - pipe = skb->data[0]; + pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); @@ -395,7 +419,7 @@ void nci_hci_data_received_cb(void *context, { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; - u8 pipe, type, instruction; + u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; @@ -415,7 +439,7 @@ void nci_hci_data_received_cb(void *context, /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { - pipe = packet->header & NCI_HCI_FRAGMENT; + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; @@ -434,7 +458,7 @@ void nci_hci_data_received_cb(void *context, *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { - msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; + msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } @@ -452,11 +476,10 @@ void nci_hci_data_received_cb(void *context, packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { - pipe = packet->header; - instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); - skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + - NCI_HCI_HCP_MESSAGE_HEADER_LEN); - nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb); + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); + skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); + nci_hci_hcp_message_rx(ndev, pipe, type, + NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); @@ -488,6 +511,7 @@ EXPORT_SYMBOL(nci_hci_open_pipe); int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -520,6 +544,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + } kfree(tmp); return r; @@ -529,6 +559,7 @@ EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -553,8 +584,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - if (r == NCI_STATUS_OK) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 096c6276e6b92..b3fe02a2339e4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -337,12 +337,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, unsigned short gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; - struct ovs_skb_cb ovs_cb; int err; - ovs_cb = *OVS_CB(skb); + BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET); segs = __skb_gso_segment(skb, NETIF_F_SG, false); - *OVS_CB(skb) = ovs_cb; if (IS_ERR(segs)) return PTR_ERR(segs); if (segs == NULL) @@ -360,7 +358,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, /* Queue all of the segments. */ skb = segs; do { - *OVS_CB(skb) = ovs_cb; if (gso_type & SKB_GSO_UDP && skb != segs) key = &later_key; @@ -906,7 +903,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, true, &mask); /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], @@ -1033,7 +1030,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, struct sw_flow_key masked_key; int error; - ovs_flow_mask_key(&masked_key, key, mask); + ovs_flow_mask_key(&masked_key, key, true, mask); error = ovs_nla_copy_actions(a, &masked_key, &acts, log); if (error) { OVS_NLERR(log, diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4613df8c82900..eed562295c788 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -56,20 +56,21 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) } void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) + bool full, const struct sw_flow_mask *mask) { - const long *m = (const long *)((const u8 *)&mask->key + - mask->range.start); - const long *s = (const long *)((const u8 *)src + - mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); + int start = full ? 0 : mask->range.start; + int len = full ? sizeof *dst : range_n_bytes(&mask->range); + const long *m = (const long *)((const u8 *)&mask->key + start); + const long *s = (const long *)((const u8 *)src + start); + long *d = (long *)((u8 *)dst + start); int i; - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. + /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, + * if 'full' is false the memory outside of the 'mask->range' is left + * uninitialized. This can be used as an optimization when further + * operations on 'dst' only use contents within 'mask->range'. */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + for (i = 0; i < len; i += sizeof(long)) *d++ = *s++ & *m++; } @@ -91,7 +92,8 @@ struct sw_flow *ovs_flow_alloc(void) /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, - GFP_KERNEL | __GFP_ZERO, 0); + GFP_KERNEL | __GFP_ZERO, + node_online(0) ? 0 : NUMA_NO_NODE); if (!stats) goto err; @@ -473,7 +475,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, u32 hash; struct sw_flow_key masked_key; - ovs_flow_mask_key(&masked_key, unmasked, mask); + ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 616eda10d9554..2dd9900f533df 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); + bool full, const struct sw_flow_mask *mask); #endif /* flow_table.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5989c6ee5513..ebc39e66d7049 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1272,16 +1272,6 @@ static void packet_sock_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static int fanout_rr_next(struct packet_fanout *f, unsigned int num) -{ - int x = atomic_read(&f->rr_cur) + 1; - - if (x >= num) - x = 0; - - return x; -} - static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) @@ -1293,13 +1283,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - int cur, old; + unsigned int val = atomic_inc_return(&f->rr_cur); - cur = atomic_read(&f->rr_cur); - while ((old = atomic_cmpxchg(&f->rr_cur, cur, - fanout_rr_next(f, num))) != cur) - cur = old; - return cur; + return val % num; } static unsigned int fanout_demux_cpu(struct packet_fanout *f, @@ -1353,7 +1339,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; - unsigned int num = f->num_members; + unsigned int num = READ_ONCE(f->num_members); struct packet_sock *po; unsigned int idx; @@ -1538,6 +1524,20 @@ static void fanout_release(struct sock *sk) mutex_unlock(&fanout_mutex); } +static bool packet_extra_vlan_len_allowed(const struct net_device *dev, + struct sk_buff *skb) +{ + /* Earlier code assumed this would be a VLAN pkt, double-check + * this now that we have the actual packet in hand. We can only + * do this check on Ethernet devices. + */ + if (unlikely(dev->type != ARPHRD_ETHER)) + return false; + + skb_reset_mac_header(skb); + return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); +} + static const struct proto_ops packet_ops; static const struct proto_ops packet_ops_spkt; @@ -1699,18 +1699,10 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, goto retry; } - if (len > (dev->mtu + dev->hard_header_len + extra_len)) { - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ - struct ethhdr *ehdr; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) { - err = -EMSGSIZE; - goto out_unlock; - } + if (len > (dev->mtu + dev->hard_header_len + extra_len) && + !packet_extra_vlan_len_allowed(dev, skb)) { + err = -EMSGSIZE; + goto out_unlock; } skb->protocol = proto; @@ -2129,6 +2121,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len) return false; } +static void tpacket_set_protocol(const struct net_device *dev, + struct sk_buff *skb) +{ + if (dev->type == ARPHRD_ETHER) { + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + } +} + static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) @@ -2165,8 +2166,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, 0); if (unlikely(po->tp_tx_has_off)) { int off_min, off_max, off; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -2212,6 +2211,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, dev->hard_header_len); if (unlikely(err)) return err; + if (!skb->protocol) + tpacket_set_protocol(dev, skb); data += dev->hard_header_len; to_write -= dev->hard_header_len; @@ -2246,6 +2247,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, len = ((to_write > len_max) ? len_max : to_write); } + skb_probe_transport_header(skb, 0); + return tp_len; } @@ -2290,12 +2293,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; - reserve = dev->hard_header_len + VLAN_HLEN; + if (po->sk.sk_socket->type == SOCK_RAW) + reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if (size_max > dev->mtu + reserve) - size_max = dev->mtu + reserve; + if (size_max > dev->mtu + reserve + VLAN_HLEN) + size_max = dev->mtu + reserve + VLAN_HLEN; do { ph = packet_current_frame(po, &po->tx_ring, @@ -2321,18 +2325,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); - if (tp_len > dev->mtu + dev->hard_header_len) { - struct ethhdr *ehdr; - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ + if (likely(tp_len >= 0) && + tp_len > dev->mtu + reserve && + !packet_extra_vlan_len_allowed(dev, skb)) + tp_len = -EMSGSIZE; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) - tp_len = -EMSGSIZE; - } if (unlikely(tp_len < 0)) { if (po->tp_loss) { __packet_set_status(po, ph, @@ -2553,18 +2550,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (!gso_type && (len > dev->mtu + reserve + extra_len)) { - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ - struct ethhdr *ehdr; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) { - err = -EMSGSIZE; - goto out_free; - } + if (!gso_type && (len > dev->mtu + reserve + extra_len) && + !packet_extra_vlan_len_allowed(dev, skb)) { + err = -EMSGSIZE; + goto out_free; } skb->protocol = proto; @@ -2595,8 +2584,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) len += vnet_hdr_len; } - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, reserve); + skb_probe_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2699,22 +2688,40 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) +static int packet_do_bind(struct sock *sk, const char *name, int ifindex, + __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; + struct net_device *dev = NULL; + int ret = 0; + bool unlisted = false; - if (po->fanout) { - if (dev) - dev_put(dev); - + if (po->fanout) return -EINVAL; - } lock_sock(sk); spin_lock(&po->bind_lock); + rcu_read_lock(); + + if (name) { + dev = dev_get_by_name_rcu(sock_net(sk), name); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } else if (ifindex) { + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } + + if (dev) + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -2722,24 +2729,37 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) need_rehook = proto_curr != proto || dev_curr != dev; if (need_rehook) { - unregister_prot_hook(sk, true); + if (po->running) { + rcu_read_unlock(); + __unregister_prot_hook(sk, true); + rcu_read_lock(); + dev_curr = po->prot_hook.dev; + if (dev) + unlisted = !dev_get_by_index_rcu(sock_net(sk), + dev->ifindex); + } po->num = proto; po->prot_hook.type = proto; - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - - po->prot_hook.dev = dev; - - po->ifindex = dev ? dev->ifindex : 0; - packet_cached_dev_assign(po, dev); + if (unlikely(unlisted)) { + dev_put(dev); + po->prot_hook.dev = NULL; + po->ifindex = -1; + packet_cached_dev_reset(po); + } else { + po->prot_hook.dev = dev; + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; - if (!dev || (dev->flags & IFF_UP)) { + if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; @@ -2748,9 +2768,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) } out_unlock: + rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); - return 0; + return ret; } /* @@ -2762,8 +2783,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; char name[15]; - struct net_device *dev; - int err = -ENODEV; /* * Check legality @@ -2773,19 +2792,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; strlcpy(name, uaddr->sa_data, sizeof(name)); - dev = dev_get_by_name(sock_net(sk), name); - if (dev) - err = packet_do_bind(sk, dev, pkt_sk(sk)->num); - return err; + return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; - struct net_device *dev = NULL; - int err; - /* * Check legality @@ -2796,16 +2809,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_family != AF_PACKET) return -EINVAL; - if (sll->sll_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); - if (dev == NULL) - goto out; - } - err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); - -out: - return err; + return packet_do_bind(sk, NULL, sll->sll_ifindex, + sll->sll_protocol ? : pkt_sk(sk)->num); } static struct proto packet_proto = { diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 32ab87d348286..11d0b29ce4b87 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 273b8bff6ba44..657ba9f5d3086 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, } ibmr = rds_ib_alloc_fmr(rds_ibdev); - if (IS_ERR(ibmr)) + if (IS_ERR(ibmr)) { + rds_ib_dev_put(rds_ibdev); return ibmr; + } ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); if (ret == 0) diff --git a/net/rds/info.c b/net/rds/info.c index 9a6b4f66187cf..140a44a5f7b7f 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, /* check for all kinds of wrapping and the like */ start = (unsigned long)optval; - if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { ret = -EINVAL; goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index e9430f537f9c2..7b30c0f3180d1 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -986,11 +986,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) release_sock(sk); } - /* racing with another thread binding seems ok here */ + lock_sock(sk); if (daddr == 0 || rs->rs_bound_addr == 0) { + release_sock(sk); ret = -ENOTCONN; /* XXX not a great errno */ goto out; } + release_sock(sk); /* size of rm including all sgs */ ret = rds_rm_size(msg, payload_len); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index fbc5ef88bc0e6..27a992154804c 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } to_copy = min(tc->t_tinc_data_rem, left); - pskb_pull(clone, offset); - pskb_trim(clone, to_copy); + if (!pskb_pull(clone, offset) || + pskb_trim(clone, to_copy)) { + pr_warn("rds_tcp_data_recv: pull/trim failed " + "left %zu data_rem %zu skb_len %d\n", + left, tc->t_tinc_data_rem, skb->len); + kfree_skb(clone); + desc->error = -ENOMEM; + goto out; + } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " diff --git a/net/rfkill/core.c b/net/rfkill/core.c index fa7cd792791cb..a97bb7332607a 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1081,17 +1081,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) return res; } -static bool rfkill_readable(struct rfkill_data *data) -{ - bool r; - - mutex_lock(&data->mtx); - r = !list_empty(&data->events); - mutex_unlock(&data->mtx); - - return r; -} - static ssize_t rfkill_fop_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -1108,8 +1097,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, goto out; } mutex_unlock(&data->mtx); + /* since we re-check and it just compares pointers, + * using !list_empty() without locking isn't a problem + */ ret = wait_event_interruptible(data->read_wait, - rfkill_readable(data)); + !list_empty(&data->events)); mutex_lock(&data->mtx); if (ret) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3d43e4979f27c..f8d9c2a2c451d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -413,7 +414,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index dc6a2d324bd81..521ffca91228a 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -200,6 +201,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -234,18 +236,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -294,6 +318,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -309,29 +336,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; + struct tcf_bpf_cfg tmp; - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3f63ceac8e014..844dd85426dca 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -166,6 +166,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; + skb_sender_cpu_clear(skb2); err = dev_queue_xmit(skb2); out: diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 91bd9c19471d5..c0b86f2bfe221 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -364,7 +364,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; if (oldprog) { - list_replace_rcu(&prog->link, &oldprog->link); + list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); } else { diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a620c4e288a51..75df923f5c03c 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -419,6 +419,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; + tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; @@ -480,7 +482,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); } fnew->perturb_timer.function = flow_perturbation; @@ -520,7 +521,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (*arg == 0) list_add_tail_rcu(&fnew->list, &head->filters); else - list_replace_rcu(&fnew->list, &fold->list); + list_replace_rcu(&fold->list, &fnew->list); *arg = (unsigned long)fnew; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 715e01e5910a9..f23a3b68bba63 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -33,7 +33,6 @@ struct fw_head { u32 mask; - bool mask_set; struct fw_filter __rcu *ht[HTSIZE]; struct rcu_head rcu; }; @@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, } } } else { - /* old method */ + /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id ^ tp->q->handle)))) { res->classid = id; @@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle) static int fw_init(struct tcf_proto *tp) { - struct fw_head *head; - - head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); - if (head == NULL) - return -ENOBUFS; - - head->mask_set = false; - rcu_assign_pointer(tp->root, head); + /* We don't allocate fw_head here, because in the old method + * we don't need it at all. + */ return 0; } @@ -252,7 +246,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, int err; if (!opt) - return handle ? -EINVAL : 0; + return handle ? -EINVAL : 0; /* Succeed if it is old method. */ err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); if (err < 0) @@ -302,11 +296,17 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!handle) return -EINVAL; - if (!head->mask_set) { - head->mask = 0xFFFFFFFF; + if (!head) { + u32 mask = 0xFFFFFFFF; if (tb[TCA_FW_MASK]) - head->mask = nla_get_u32(tb[TCA_FW_MASK]); - head->mask_set = true; + mask = nla_get_u32(tb[TCA_FW_MASK]); + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOBUFS; + head->mask = mask; + + rcu_assign_pointer(tp->root, head); } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index cab9e9b43967a..4fbb67430ce48 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto *tp, bool force) return false; } } + + if (tp_c->refcnt > 1) + return false; + + if (tp_c->refcnt == 1) { + struct tc_u_hnode *ht; + + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) + if (!ht_empty(ht)) + return false; + } } if (root_ht && --root_ht->refcnt == 0) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1e1c89e51a118..68c599a5e1d1d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -253,7 +253,8 @@ int qdisc_set_default(const char *name) } /* We know handle. Find qdisc among all qdisc's attached to device - (root qdisc, all its children, children of children etc.) + * (root qdisc, all its children, children of children etc.) + * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) @@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry(q, &root->list, list) { + list_for_each_entry_rcu(q, &root->list, list) { if (q->handle == handle) return q; } @@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q) struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); - list_add_tail(&q->list, &root->list); + ASSERT_RTNL(); + list_add_tail_rcu(&q->list, &root->list); } } EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_del(&q->list); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + ASSERT_RTNL(); + list_del_rcu(&q->list); + } } EXPORT_SYMBOL(qdisc_list_del); @@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (n == 0) return; drops = max_t(int, n, 0); + rcu_read_lock(); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) - return; + break; + if (sch->flags & TCQ_F_NOPARENT) + break; + /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { - WARN_ON(parentid != TC_H_ROOT); - return; + WARN_ON_ONCE(parentid != TC_H_ROOT); + break; } cops = sch->ops->cl_ops; if (cops->qlen_notify) { @@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) sch->q.qlen -= n; __qdisc_qstats_drop(sch, drops); } + rcu_read_unlock(); } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index c244c45b78d7f..9291598b5aad4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -162,10 +162,10 @@ static unsigned int fq_codel_drop(struct Qdisc *sch) skb = dequeue_head(flow); len = qdisc_pkt_len(skb); q->backlogs[idx] -= len; - kfree_skb(skb); sch->q.qlen--; qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); flow->dropped++; return idx; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6efca30894aad..3c6f6b774ba6c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -666,8 +666,10 @@ static void qdisc_rcu_free(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); - if (qdisc_is_percpu_stats(qdisc)) + if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); + free_percpu(qdisc->cpu_qstats); + } kfree((char *) qdisc - qdisc->padded); } @@ -743,7 +745,7 @@ static void attach_one_default_qdisc(struct net_device *dev, return; } if (!netif_is_multiqueue(dev)) - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f1acb0f60dc35..9bfe551ca62bd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -575,6 +575,8 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); + //printk("afr: htb_enqueue skb %p for sch %p with class %p and q.qlen %u\n", skb, sch, cl, sch->q.qlen); + if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { @@ -847,6 +849,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, } skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); + //printk("afr: htb_dequeue_tree found skb in cl->un.leaf.q %p with qlen %u\n", skb, cl->un.leaf.q->q.qlen); if (likely(skb != NULL)) break; @@ -885,6 +888,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); + //printk("afr: htb_dequeue found skb %p early\n", skb); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -921,6 +925,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) m |= 1 << prio; skb = htb_dequeue_tree(q, prio, level); + //printk("afr: htb_dequeue found skb %p late\n", skb); if (likely(skb != NULL)) goto ok; } @@ -973,13 +978,18 @@ static void htb_reset(struct Qdisc *sch) struct htb_class *cl; unsigned int i; + //printk("afr: htb_reset for sch %p with qlen %u\n", sch, sch->q.qlen); + for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { - if (cl->un.leaf.q) + if (cl->un.leaf.q) { + //printk("afr: htb_reset for sch %p in loop reset class %p with q %u\n", sch, cl, cl->un.leaf.q->q.qlen); + qdisc_reset(cl->un.leaf.q); + } INIT_LIST_HEAD(&cl->un.leaf.drop_list); } cl->prio_activity = 0; @@ -1025,6 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; + //printk("afr: htb_init for qdisc %p\n", sch); err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); if (err < 0) return err; @@ -1232,6 +1243,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { + //printk("afr: htb_destroy class %p for sched %p\n", cl, sch); if (!cl->level) { WARN_ON(!cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); @@ -1248,6 +1260,7 @@ static void htb_destroy(struct Qdisc *sch) struct htb_class *cl; unsigned int i; + //printk("afr: htb_destroy sch %p\n", sch); cancel_work_sync(&q->work); qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below @@ -1267,6 +1280,7 @@ static void htb_destroy(struct Qdisc *sch) htb_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); + //printk("afr: htb_destroy sch %p with direct_qlen %u and qlen %u\n", sch, q->direct_qlen, q->direct_queue.qlen); __skb_queue_purge(&q->direct_queue); } @@ -1277,7 +1291,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) unsigned int qlen; struct Qdisc *new_q = NULL; int last_child = 0; - + //printk("afr: htb_delete for sch %p\n", sch); /* TODO: why don't allow to delete subtree ? references ? does * tc subsys guarantee us that in htb_destroy it holds no class * refs so that we can remove children safely there ? @@ -1344,6 +1358,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct tc_htb_opt *hopt; u64 rate64, ceil64; + //printk("afr: htb_change class with id %u for sch %p\n", classid, sch); + /* extract all subattrs from opt attr */ if (!opt) goto failure; @@ -1479,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - + //printk("afr: htb_change to new rate64 %llu for sch %p\n", rate64, sch); ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3cbaecd283af..3e82f047caaf4 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) if (qdisc == NULL) goto err; priv->qdiscs[ntx] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; @@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3811a745452cf..ad70ecf57ce79 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) goto err; } priv->qdiscs[i] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } /* If the mqprio options indicate that hardware should own @@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 4f15b7d730e13..1543e39f47c33 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, if (!has_sha1) return -EINVAL; - memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], - hmacs->shmac_num_idents * sizeof(__u16)); + for (i = 0; i < hmacs->shmac_num_idents; i++) + ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + hmacs->shmac_num_idents * sizeof(__u16)); return 0; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0e4198ee23708..3267a5cbb3e86 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -634,6 +634,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; + struct ipv6_txoptions *opt; newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); if (!newsk) @@ -653,6 +654,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); + /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() * and getpeername(). */ diff --git a/net/sctp/output.c b/net/sctp/output.c index fc5e45b8a832d..abe7c2db24120 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -599,7 +599,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) return err; no_route: kfree_skb(nskb); - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53b7acde9aa37..9d134ab3351f5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -60,6 +60,8 @@ #include #include +#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) + /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -1166,7 +1168,7 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } -static int __net_init sctp_net_init(struct net *net) +static int __net_init sctp_defaults_init(struct net *net) { int status; @@ -1259,12 +1261,6 @@ static int __net_init sctp_net_init(struct net *net) sctp_dbg_objcnt_init(net); - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init(net))) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } - /* Initialize the local address list. */ INIT_LIST_HEAD(&net->sctp.local_addr_list); spin_lock_init(&net->sctp.local_addr_lock); @@ -1280,9 +1276,6 @@ static int __net_init sctp_net_init(struct net *net) return 0; -err_ctl_sock_init: - sctp_dbg_objcnt_exit(net); - sctp_proc_exit(net); err_init_proc: cleanup_sctp_mibs(net); err_init_mibs: @@ -1291,15 +1284,12 @@ static int __net_init sctp_net_init(struct net *net) return status; } -static void __net_exit sctp_net_exit(struct net *net) +static void __net_exit sctp_defaults_exit(struct net *net) { /* Free the local address list */ sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(net->sctp.ctl_sock); - sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); @@ -1307,9 +1297,32 @@ static void __net_exit sctp_net_exit(struct net *net) sctp_sysctl_net_unregister(net); } -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, +static struct pernet_operations sctp_defaults_ops = { + .init = sctp_defaults_init, + .exit = sctp_defaults_exit, +}; + +static int __net_init sctp_ctrlsock_init(struct net *net) +{ + int status; + + /* Initialize the control inode/socket for handling OOTB packets. */ + status = sctp_ctl_sock_init(net); + if (status) + pr_err("Failed to initialize the SCTP control sock\n"); + + return status; +} + +static void __net_init sctp_ctrlsock_exit(struct net *net) +{ + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); +} + +static struct pernet_operations sctp_ctrlsock_ops = { + .init = sctp_ctrlsock_init, + .exit = sctp_ctrlsock_exit, }; /* Initialize the universe into something sensible. */ @@ -1321,6 +1334,8 @@ static __init int sctp_init(void) unsigned long limit; int max_share; int order; + int num_entries; + int max_entry_order; sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); @@ -1373,14 +1388,24 @@ static __init int sctp_init(void) /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. + * Though not identical. Start by getting a goal size */ if (totalram_pages >= (128 * 1024)) goal = totalram_pages >> (22 - PAGE_SHIFT); else goal = totalram_pages >> (24 - PAGE_SHIFT); - for (order = 0; (1UL << order) < goal; order++) - ; + /* Then compute the page order for said goal */ + order = get_order(goal); + + /* Now compute the required page order for the maximum sized table we + * want to create + */ + max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES * + sizeof(struct sctp_bind_hashbucket)); + + /* Limit the page order by that maximum hash table size */ + order = min(order, max_entry_order); do { sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / @@ -1414,27 +1439,42 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); } - /* Allocate and initialize the SCTP port hash table. */ + /* Allocate and initialize the SCTP port hash table. + * Note that order is initalized to start at the max sized + * table we want to support. If we can't get that many pages + * reduce the order and try again + */ do { - sctp_port_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_bind_hashbucket); - if ((sctp_port_hashsize > (64 * 1024)) && order > 0) - continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); + if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } + + /* Now compute the number of entries that will fit in the + * port hash space we allocated + */ + num_entries = (1UL << order) * PAGE_SIZE / + sizeof(struct sctp_bind_hashbucket); + + /* And finish by rounding it down to the nearest power of two + * this wastes some memory of course, but its needed because + * the hash function operates based on the assumption that + * that the number of entries is a power of two + */ + sctp_port_hashsize = rounddown_pow_of_two(num_entries); + for (i = 0; i < sctp_port_hashsize; i++) { spin_lock_init(&sctp_port_hashtable[i].lock); INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - pr_info("Hash tables configured (established %d bind %d)\n", - sctp_assoc_hashsize, sctp_port_hashsize); + pr_info("Hash tables configured (established %d bind %d/%d)\n", + sctp_assoc_hashsize, sctp_port_hashsize, num_entries); sctp_sysctl_register(); @@ -1442,8 +1482,11 @@ static __init int sctp_init(void) sctp_v4_pf_init(); sctp_v6_pf_init(); - status = sctp_v4_protosw_init(); + status = register_pernet_subsys(&sctp_defaults_ops); + if (status) + goto err_register_defaults; + status = sctp_v4_protosw_init(); if (status) goto err_protosw_init; @@ -1451,9 +1494,9 @@ static __init int sctp_init(void) if (status) goto err_v6_protosw_init; - status = register_pernet_subsys(&sctp_net_ops); + status = register_pernet_subsys(&sctp_ctrlsock_ops); if (status) - goto err_register_pernet_subsys; + goto err_register_ctrlsock; status = sctp_v4_add_protocol(); if (status) @@ -1469,12 +1512,14 @@ static __init int sctp_init(void) err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: - unregister_pernet_subsys(&sctp_net_ops); -err_register_pernet_subsys: + unregister_pernet_subsys(&sctp_ctrlsock_ops); +err_register_ctrlsock: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: + unregister_pernet_subsys(&sctp_defaults_ops); +err_register_defaults: sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1507,12 +1552,14 @@ static __exit void sctp_exit(void) sctp_v6_del_protocol(); sctp_v4_del_protocol(); - unregister_pernet_subsys(&sctp_net_ops); + unregister_pernet_subsys(&sctp_ctrlsock_ops); /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); + unregister_pernet_subsys(&sctp_defaults_ops); + /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 06320c8c1c866..83a07d4686440 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Set an expiration time for the cookie. */ cookie->c.expiration = ktime_add(asoc->cookie_life, - ktime_get()); + ktime_get_real()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, @@ -1780,7 +1780,7 @@ struct sctp_association *sctp_unpack_cookie( if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) kt = skb_get_ktime(skb); else - kt = ktime_get(); + kt = ktime_get_real(); if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fef2acdf4a2e6..ecae5561b9127 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -285,11 +286,12 @@ void sctp_generate_t3_rtx_event(unsigned long peer) static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3ee27b7704ffb..e6bb98e583fb9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( retval = SCTP_DISPOSITION_CONSUME; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + if (abort) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + if (abort) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f09de7fac2e6a..3c5833058b036 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1513,8 +1513,7 @@ static void sctp_close(struct sock *sk, long timeout) struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, NULL, 0); - if (chunk) - sctp_primitive_ABORT(net, asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); } else sctp_primitive_SHUTDOWN(net, asoc, NULL); } @@ -1528,8 +1527,10 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - local_bh_disable(); + spin_lock_bh(&net->sctp.addr_wq_lock); bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() @@ -1539,7 +1540,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - local_bh_enable(); + spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); @@ -3580,6 +3581,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) return 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); if (val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; @@ -3588,6 +3590,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); return 0; } @@ -4121,18 +4124,28 @@ static int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(net, sk->sk_prot, 1); + + /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ if (net->sctp.default_auto_asconf) { + spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; - } else + spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); + } else { sp->do_auto_asconf = 0; + } + local_bh_enable(); return 0; } -/* Cleanup any SCTP per socket resources. */ +/* Cleanup any SCTP per socket resources. Must be called with + * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true + */ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -5542,6 +5555,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; + int i; if (!ep->auth_enable) return -EACCES; @@ -5559,8 +5573,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, return -EFAULT; if (put_user(num_idents, &p->shmac_num_idents)) return -EFAULT; - if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) - return -EFAULT; + for (i = 0; i < num_idents; i++) { + __u16 hmacid = ntohs(hmacs->hmac_ids[i]); + + if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16))) + return -EFAULT; + } return 0; } @@ -6640,6 +6658,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -6663,6 +6682,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -7161,6 +7181,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; + newsk->sk_tsflags = sk->sk_tsflags; newsk->sk_no_check_tx = sk->sk_no_check_tx; newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; @@ -7193,6 +7214,22 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_ttl = 1; newinet->mc_index = 0; newinet->mc_list = NULL; + + if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + net_enable_timestamp(); +} + +static inline void sctp_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + int ancestor_size = sizeof(struct inet_sock) + + sizeof(struct sctp_sock) - + offsetof(struct sctp_sock, auto_asconf_list); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } /* Populate the fields of the newsk from the oldsk and migrate the assoc @@ -7209,7 +7246,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; - struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -7217,12 +7253,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - if (oldsp->do_auto_asconf) { - memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); - inet_sk_copy_descendant(newsk, oldsk); - memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); - } else - inet_sk_copy_descendant(newsk, oldsk); + sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. @@ -7366,6 +7397,13 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) +#include +static void sctp_v6_destroy_sock(struct sock *sk) +{ + sctp_destroy_sock(sk); + inet6_destroy_sock(sk); +} + struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, @@ -7375,7 +7413,7 @@ struct proto sctpv6_prot = { .accept = sctp_accept, .ioctl = sctp_ioctl, .init = sctp_init_sock, - .destroy = sctp_destroy_sock, + .destroy = sctp_v6_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f542..3e0fc51272259 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, struct ctl_table tbl; bool changed = false; char *none = "none"; - char tmp[8]; + char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); diff --git a/net/socket.c b/net/socket.c index 884e329976984..dcbfa868e3984 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1705,6 +1705,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_name = addr ? (struct sockaddr *)&address : NULL; /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; + msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9dd0ea8db463a..28504dfd3dad5 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -60,7 +60,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) dprintk("RPC: free allocations for req= %p\n", req); WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); - xbufp = &req->rq_private_buf; + xbufp = &req->rq_rcv_buf; free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2928afffbb81f..8d79e70bd9786 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1218,7 +1218,7 @@ int qword_get(char **bpp, char *dest, int bufsize) if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; - while (len < bufsize) { + while (len < bufsize - 1) { int h, l; h = hex_to_bin(bp[0]); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a1..d109d308ec3a5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -611,6 +611,7 @@ static void xprt_autoclose(struct work_struct *work) xprt->ops->close(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt_release_write(xprt, NULL); + wake_up_bit(&xprt->state, XPRT_LOCKED); } /** @@ -720,6 +721,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) xprt->ops->release_xprt(xprt, NULL); out: spin_unlock_bh(&xprt->transport_lock); + wake_up_bit(&xprt->state, XPRT_LOCKED); } /** @@ -1389,6 +1391,10 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); + + /* Exclude transport connect/disconnect handlers */ + wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + del_timer_sync(&xprt->timer); rpc_xprt_debugfs_unregister(xprt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index f9f13a32ddb82..2873b8d65608e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -146,7 +146,8 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->read_hdr = head; pages_needed = min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, + rs_length); for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); @@ -245,7 +246,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, + rs_length); frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7de33d1af9b6d..7fa6d78331edd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -382,6 +382,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; + u32 xdr_off; int sge_no; int sge_bytes; int page_no; @@ -416,8 +417,8 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->direction = DMA_TO_DEVICE; /* Map the payload indicated by 'byte_count' */ + xdr_off = 0; for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { - int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; ctxt->sge[sge_no].addr = @@ -455,6 +456,13 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; + /* The loop above bumps sc_dma_used for each sge. The + * xdr_buf.tail gets a separate sge, but resides in the + * same page as xdr_buf.head. Don't count it twice. + */ + if (sge_no > ctxt->count) + atomic_dec(&rdma->sc_dma_used); + if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 66891e32c5e31..5e3ad598d3f55 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -834,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport) sk->sk_user_data = NULL; xs_restore_old_callbacks(transport, sk); + xprt_clear_connected(xprt); write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); @@ -1433,6 +1434,7 @@ static void xs_tcp_data_ready(struct sock *sk) static void xs_tcp_state_change(struct sock *sk) { struct rpc_xprt *xprt; + struct sock_xprt *transport; read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) @@ -1444,13 +1446,12 @@ static void xs_tcp_state_change(struct sock *sk) sock_flag(sk, SOCK_ZAPPED), sk->sk_shutdown); + transport = container_of(xprt, struct sock_xprt, xprt); trace_rpc_socket_state_change(xprt, sk->sk_socket); switch (sk->sk_state) { case TCP_ESTABLISHED: spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { - struct sock_xprt *transport = container_of(xprt, - struct sock_xprt, xprt); /* Reset TCP record info */ transport->tcp_offset = 0; @@ -1459,6 +1460,8 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); + xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, -EAGAIN); } @@ -1494,6 +1497,9 @@ static void xs_tcp_state_change(struct sock *sk) smp_mb__after_atomic(); break; case TCP_CLOSE: + if (test_and_clear_bit(XPRT_SOCK_CONNECTING, + &transport->sock_state)) + xprt_clear_connecting(xprt); xs_sock_mark_closed(xprt); } out: @@ -2110,6 +2116,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; + set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { case 0: @@ -2174,7 +2181,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_unlock_connect(xprt, transport); - xprt_clear_connecting(xprt); return; case -EINVAL: /* Happens, for instance, if the user specified a link @@ -2216,13 +2222,14 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - /* Start by resetting any existing state */ - xs_reset_transport(transport); - - if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); + + /* Start by resetting any existing state */ + xs_reset_transport(transport); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, xprt->reestablish_timeout); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 055453d486683..a8dbe8001e464 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,6 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) } EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); -static DEFINE_MUTEX(netdev_switch_mutex); static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); /** @@ -79,9 +79,9 @@ int register_netdev_switch_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); + rtnl_lock(); err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); @@ -97,9 +97,9 @@ int unregister_netdev_switch_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); + rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); @@ -113,16 +113,17 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); * Call all network notifier blocks. This should be called by driver * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). + * rtnl_lock must be held. */ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, struct netdev_switch_notifier_info *info) { int err; + ASSERT_RTNL(); + info->dev = dev; - mutex_lock(&netdev_switch_mutex); err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); - mutex_unlock(&netdev_switch_mutex); return err; } EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c3e96e8154188..e9333147d6f18 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) { struct sk_buff *head = *headbuf; struct sk_buff *frag = *buf; - struct sk_buff *tail; + struct sk_buff *tail = NULL; struct tipc_msg *msg; u32 fragid; int delta; @@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (unlikely(skb_unclone(frag, GFP_ATOMIC))) goto err; head = *headbuf = frag; - skb_frag_list_init(head); - TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; + TIPC_SKB_CB(head)->tail = NULL; + if (skb_is_nonlinear(head)) { + skb_walk_frags(head, tail) { + TIPC_SKB_CB(head)->tail = tail; + } + } else { + skb_frag_list_init(head); + } return 0; } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index e1d3595e2ee95..4cbb0fbad0462 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -353,7 +353,7 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) static inline u32 msg_importance(struct tipc_msg *m) { if (unlikely(msg_user(m) == MSG_FRAGMENTER)) - return msg_bits(m, 5, 13, 0x7); + return msg_bits(m, 9, 0, 0x7); if (likely(msg_isdata(m) && !msg_errcode(m))) return msg_user(m); return TIPC_SYSTEM_IMPORTANCE; @@ -362,7 +362,7 @@ static inline u32 msg_importance(struct tipc_msg *m) static inline void msg_set_importance(struct tipc_msg *m, u32 i) { if (unlikely(msg_user(m) == MSG_FRAGMENTER)) - msg_set_bits(m, 5, 13, 0x7, i); + msg_set_bits(m, 9, 0, 0x7, i); else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) msg_set_user(m, i); else diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f485600c4507b..20cc6df071576 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2009,6 +2009,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1c147c869c2e6..948f316019d73 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -302,11 +302,10 @@ static void subscr_conn_msg_event(struct net *net, int conid, struct tipc_net *tn = net_generic(net, tipc_net_id); spin_lock_bh(&subscriber->lock); - subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); - if (sub) - tipc_nametbl_subscribe(sub); - else + if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub)) tipc_conn_terminate(tn->topsrv, subscriber->conid); + else + tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 66deebc66aa10..f8dfee5072c0e 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,6 +48,7 @@ #include #include "core.h" #include "bearer.h" +#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -216,6 +217,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; + int usr = msg_user(buf_msg(skb)); + + if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) + skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 06430598cf512..535a642a16889 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -316,6 +316,118 @@ static struct sock *unix_find_socket_byinode(struct inode *i) return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static inline int unix_writable(struct sock *sk) { return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; @@ -420,6 +532,8 @@ static void unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -648,6 +762,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1015,6 +1130,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1347,7 +1464,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1364,6 +1481,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1372,6 +1504,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1393,10 +1528,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->user, scm->fp->fp[i]); return max_level; } @@ -1453,6 +1586,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; int max_level; int data_len = 0; + int sk_locked; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1532,12 +1666,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1545,10 +1681,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1574,21 +1714,43 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { - err = -EAGAIN; - goto out_unlock; + /* other == sk && unix_peer(other) != sk if + * - unix_peer(sk) == NULL, destination address bound to sk + * - unix_peer(sk) == sk by time of get but disconnected before lock + */ + if (other != sk && + unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; } - timeo = unix_wait_for_peer(other, timeo); + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { + err = -EAGAIN; + sk_locked = 1; + goto out_unlock; + } - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1602,6 +1764,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -1929,14 +2093,12 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, memset(&scm, 0, sizeof(scm)); - err = mutex_lock_interruptible(&u->readlock); - if (unlikely(err)) { - /* recvmsg() in non blocking mode is supposed to return -EAGAIN - * sk_rcvtimeo is not honored by mutex_lock_interruptible() - */ - err = noblock ? -EAGAIN : -ERESTARTSYS; - goto out; - } + mutex_lock(&u->readlock); + + if (flags & MSG_PEEK) + skip = sk_peek_offset(sk, flags); + else + skip = 0; do { int chunk; @@ -1972,19 +2134,19 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, timeo = unix_stream_data_wait(sk, timeo, last); - if (signal_pending(current) - || mutex_lock_interruptible(&u->readlock)) { + if (signal_pending(current)) { err = sock_intr_errno(timeo); + scm_destroy(&scm); goto out; } + mutex_lock(&u->readlock); continue; unlock: unix_state_unlock(sk); break; } - skip = sk_peek_offset(sk, flags); while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; @@ -2048,6 +2210,16 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, chunk); + if (UNIXCB(skb).fp) + break; + + skip = 0; + last = skb; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) + goto again; + unix_state_unlock(sk); break; } } while (size); @@ -2231,14 +2403,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable) diff --git a/net/unix/diag.c b/net/unix/diag.c index c512f64d52876..4d9679701a6df 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -220,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static struct sock *unix_lookup_by_ino(int ino) +static struct sock *unix_lookup_by_ino(unsigned int ino) { int i; struct sock *sk; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226f2d33f..6a0d48525fcf9 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,15 +116,15 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + user->unix_inflight++; + spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dd78445c7d506..04b6f3f6ee0b4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3407,12 +3407,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { - params.acl = parse_acl_data(&rdev->wiphy, info); - if (IS_ERR(params.acl)) - return PTR_ERR(params.acl); - } - if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -3436,6 +3430,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.smps_mode = NL80211_SMPS_OFF; } + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { diff --git a/net/wireless/util.c b/net/wireless/util.c index 70051ab52f4f3..7e4e3fffe7ce4 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -944,7 +944,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, ntype == NL80211_IFTYPE_P2P_CLIENT)) return -EBUSY; - if (ntype != otype && netif_running(dev)) { + if (ntype != otype) { dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fbcedbe331903..5097dce5b9166 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -153,6 +153,8 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(segs)) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 638af0655aaf8..4cd2076ff84b4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2806,7 +2806,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { - struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2837,26 +2836,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) } spin_unlock(&xfrm_policy_afinfo_lock); - rtnl_lock(); - for_each_net(net) { - struct dst_ops *xfrm_dst_ops; - - switch (afinfo->family) { - case AF_INET: - xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; - break; -#endif - default: - BUG(); - } - *xfrm_dst_ops = *afinfo->dst_ops; - } - rtnl_unlock(); - return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2892,22 +2871,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static void __net_init xfrm_dst_ops_init(struct net *net) -{ - struct xfrm_policy_afinfo *afinfo; - - rcu_read_lock(); - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); - if (afinfo) - net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; -#if IS_ENABLED(CONFIG_IPV6) - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); - if (afinfo) - net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; -#endif - rcu_read_unlock(); -} - static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -3056,7 +3019,6 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; - xfrm_dst_ops_init(net); rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 8965d1bb88119..125d6402f64f8 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -168,7 +168,10 @@ * * For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo) * Use __get_dynamic_array_len(foo) to get the length of the array - * saved. + * saved. Note, __get_dynamic_array_len() returns the total allocated + * length of the dynamic array; __print_array() expects the second + * parameter to be the number of elements. To get that, the array length + * needs to be divided by the element size. * * For __string(foo, bar) use __get_str(foo) * @@ -288,7 +291,7 @@ TRACE_EVENT(foo_bar, * This prints out the array that is defined by __array in a nice format. */ __print_array(__get_dynamic_array(list), - __get_dynamic_array_len(list), + __get_dynamic_array_len(list) / sizeof(int), sizeof(int)), __get_str(str), __get_bitmask(cpus)) ); diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 23e78dcd12bf7..38b64f4873152 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -58,8 +58,8 @@ for name in common: delta.sort() delta.reverse() -print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ - (add, remove, grow, shrink, up, -down, up-down) -print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta") +print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ + (add, remove, grow, shrink, up, -down, up-down)) +print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")) for d, n in delta: - if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d) + if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index 9cb8522d8d22a..f3d3fb42b8735 100755 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -137,7 +137,7 @@ sub read_config { my $kconfig = $ARGV[1]; my $lsmod_file = $ENV{'LSMOD'}; -my @makefiles = `find $ksource -name Makefile 2>/dev/null`; +my @makefiles = `find $ksource -name Makefile -or -name Kbuild 2>/dev/null`; chomp @makefiles; my %depends; diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 3d1984e59a301..e00bcd1293367 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -42,6 +42,7 @@ #ifndef EM_AARCH64 #define EM_AARCH64 183 +#define R_AARCH64_NONE 0 #define R_AARCH64_ABS64 257 #endif @@ -160,6 +161,22 @@ static int make_nop_x86(void *map, size_t const offset) return 0; } +static unsigned char ideal_nop4_arm64[4] = {0x1f, 0x20, 0x03, 0xd5}; +static int make_nop_arm64(void *map, size_t const offset) +{ + uint32_t *ptr; + + ptr = map + offset; + /* bl <_mcount> is 0x94000000 before relocation */ + if (*ptr != 0x94000000) + return -1; + + /* Convert to nop */ + ulseek(fd_map, offset, SEEK_SET); + uwrite(fd_map, ideal_nop, 4); + return 0; +} + /* * Get the whole file as a programming convenience in order to avoid * malloc+lseek+read+free of many pieces. If successful, then mmap @@ -353,7 +370,12 @@ do_file(char const *const fname) altmcount = "__gnu_mcount_nc"; break; case EM_AARCH64: - reltype = R_AARCH64_ABS64; gpfx = '_'; break; + reltype = R_AARCH64_ABS64; + make_nop = make_nop_arm64; + rel_type_nop = R_AARCH64_NONE; + ideal_nop = ideal_nop4_arm64; + gpfx = '_'; + break; case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; case EM_METAG: reltype = R_METAG_ADDR32; altmcount = "_mcount_wrapper"; diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 49b582a225b0b..b9897e2be404d 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -377,7 +377,7 @@ static void nop_mcount(Elf_Shdr const *const relhdr, if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { if (make_nop) - ret = make_nop((void *)ehdr, shdr->sh_offset + relp->r_offset); + ret = make_nop((void *)ehdr, _w(shdr->sh_offset) + _w(relp->r_offset)); if (warn_on_notrace_sect && !once) { printf("Section %s has mcount callers being ignored\n", txtname); diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 826470d7f0007..96e2486a6fc47 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -263,7 +263,8 @@ sub check_objcopy } elsif ($arch eq "powerpc") { $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; - $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:"; + # See comment in the sparc64 section for why we use '\w'. + $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?\\w*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$"; if ($bits == 64) { diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8f..0e37e4fba8fac 100644 --- a/security/inode.c +++ b/security/inode.c @@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(securityfs_remove); -static struct kobject *security_kobj; - static int __init securityfs_init(void) { int retval; - security_kobj = kobject_create_and_add("security", kernel_kobj); - if (!security_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "security"); + if (retval) + return retval; retval = register_filesystem(&fs_type); if (retval) - kobject_put(security_kobj); + sysfs_remove_mount_point(kernel_kobj, "security"); return retval; } diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 10f994307a04e..d6bc2b3af9efe 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "evm.h" int evm_initialized; @@ -148,7 +149,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, xattr_value_len, calc.digest); if (rc) break; - rc = memcmp(xattr_data->digest, calc.digest, + rc = crypto_memneq(xattr_data->digest, calc.digest, sizeof(calc.digest)); if (rc) rc = -EINVAL; @@ -296,6 +297,17 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, iint = integrity_iint_find(d_backing_inode(dentry)); if (iint && (iint->flags & IMA_NEW_FILE)) return 0; + + /* exception for pseudo filesystems */ + if (dentry->d_inode->i_sb->s_magic == TMPFS_MAGIC + || dentry->d_inode->i_sb->s_magic == SYSFS_MAGIC) + return 0; + + integrity_audit_msg(AUDIT_INTEGRITY_METADATA, + dentry->d_inode, dentry->d_name.name, + "update_metadata", + integrity_status_msg[evm_status], + -EPERM, 0); } out: if (evm_status != INTEGRITY_PASS) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 8ee997dff1393..fc56d4dfa9547 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -106,7 +106,7 @@ void ima_add_violation(struct file *file, const unsigned char *filename, const char *op, const char *cause); int ima_init_crypto(void); void ima_putc(struct seq_file *m, void *data, int datalen); -void ima_print_digest(struct seq_file *m, u8 *digest, int size); +void ima_print_digest(struct seq_file *m, u8 *digest, u32 size); struct ima_template_desc *ima_template_desc_current(void); int ima_init_template(void); diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 461215e5fd31d..816d175da79aa 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -190,9 +190,9 @@ static const struct file_operations ima_measurements_ops = { .release = seq_release, }; -void ima_print_digest(struct seq_file *m, u8 *digest, int size) +void ima_print_digest(struct seq_file *m, u8 *digest, u32 size) { - int i; + u32 i; for (i = 0; i < size; i++) seq_printf(m, "%02x", *(digest + i)); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index d1eefb9d65fb2..3997e206f82da 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -27,6 +27,8 @@ #define IMA_UID 0x0008 #define IMA_FOWNER 0x0010 #define IMA_FSUUID 0x0020 +#define IMA_INMASK 0x0040 +#define IMA_EUID 0x0080 #define UNKNOWN 0 #define MEASURE 0x0001 /* same as IMA_MEASURE */ @@ -42,6 +44,8 @@ enum lsm_rule_types { LSM_OBJ_USER, LSM_OBJ_ROLE, LSM_OBJ_TYPE, LSM_SUBJ_USER, LSM_SUBJ_ROLE, LSM_SUBJ_TYPE }; +enum policy_types { ORIGINAL_TCB = 1, DEFAULT_TCB }; + struct ima_rule_entry { struct list_head list; int action; @@ -70,7 +74,7 @@ struct ima_rule_entry { * normal users can easily run the machine out of memory simply building * and running executables. */ -static struct ima_rule_entry default_rules[] = { +static struct ima_rule_entry dont_measure_rules[] = { {.action = DONT_MEASURE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC}, @@ -79,12 +83,31 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC, + .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC} +}; + +static struct ima_rule_entry original_measurement_rules[] = { {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, - {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, .uid = GLOBAL_ROOT_UID, - .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, + {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, +}; + +static struct ima_rule_entry default_measurement_rules[] = { + {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, + .flags = IMA_FUNC | IMA_MASK}, + {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, + .flags = IMA_FUNC | IMA_MASK}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_EUID}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_UID}, {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, }; @@ -99,6 +122,7 @@ static struct ima_rule_entry default_appraise_rules[] = { {.action = DONT_APPRAISE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_APPRAISE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC}, #ifndef CONFIG_IMA_APPRAISE_SIGNED_INIT {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .flags = IMA_FOWNER}, @@ -115,14 +139,29 @@ static struct list_head *ima_rules; static DEFINE_MUTEX(ima_rules_mutex); -static bool ima_use_tcb __initdata; +static int ima_policy __initdata; static int __init default_measure_policy_setup(char *str) { - ima_use_tcb = 1; + if (ima_policy) + return 1; + + ima_policy = ORIGINAL_TCB; return 1; } __setup("ima_tcb", default_measure_policy_setup); +static int __init policy_setup(char *str) +{ + if (ima_policy) + return 1; + + if (strcmp(str, "tcb") == 0) + ima_policy = DEFAULT_TCB; + + return 1; +} +__setup("ima_policy=", policy_setup); + static bool ima_use_appraise_tcb __initdata; static int __init default_appraise_policy_setup(char *str) { @@ -182,6 +221,9 @@ static bool ima_match_rules(struct ima_rule_entry *rule, if ((rule->flags & IMA_MASK) && (rule->mask != mask && func != POST_SETATTR)) return false; + if ((rule->flags & IMA_INMASK) && + (!(rule->mask & mask) && func != POST_SETATTR)) + return false; if ((rule->flags & IMA_FSMAGIC) && rule->fsmagic != inode->i_sb->s_magic) return false; @@ -190,6 +232,16 @@ static bool ima_match_rules(struct ima_rule_entry *rule, return false; if ((rule->flags & IMA_UID) && !uid_eq(rule->uid, cred->uid)) return false; + if (rule->flags & IMA_EUID) { + if (has_capability_noaudit(current, CAP_SETUID)) { + if (!uid_eq(rule->uid, cred->euid) + && !uid_eq(rule->uid, cred->suid) + && !uid_eq(rule->uid, cred->uid)) + return false; + } else if (!uid_eq(rule->uid, cred->euid)) + return false; + } + if ((rule->flags & IMA_FOWNER) && !uid_eq(rule->fowner, inode->i_uid)) return false; for (i = 0; i < MAX_LSM_RULES; i++) { @@ -333,21 +385,31 @@ void __init ima_init_policy(void) { int i, measure_entries, appraise_entries; - /* if !ima_use_tcb set entries = 0 so we load NO default rules */ - measure_entries = ima_use_tcb ? ARRAY_SIZE(default_rules) : 0; + /* if !ima_policy set entries = 0 so we load NO default rules */ + measure_entries = ima_policy ? ARRAY_SIZE(dont_measure_rules) : 0; appraise_entries = ima_use_appraise_tcb ? ARRAY_SIZE(default_appraise_rules) : 0; - for (i = 0; i < measure_entries + appraise_entries; i++) { - if (i < measure_entries) - list_add_tail(&default_rules[i].list, - &ima_default_rules); - else { - int j = i - measure_entries; + for (i = 0; i < measure_entries; i++) + list_add_tail(&dont_measure_rules[i].list, &ima_default_rules); - list_add_tail(&default_appraise_rules[j].list, + switch (ima_policy) { + case ORIGINAL_TCB: + for (i = 0; i < ARRAY_SIZE(original_measurement_rules); i++) + list_add_tail(&original_measurement_rules[i].list, &ima_default_rules); - } + break; + case DEFAULT_TCB: + for (i = 0; i < ARRAY_SIZE(default_measurement_rules); i++) + list_add_tail(&default_measurement_rules[i].list, + &ima_default_rules); + default: + break; + } + + for (i = 0; i < appraise_entries; i++) { + list_add_tail(&default_appraise_rules[i].list, + &ima_default_rules); } ima_rules = &ima_default_rules; @@ -373,7 +435,8 @@ enum { Opt_audit, Opt_obj_user, Opt_obj_role, Opt_obj_type, Opt_subj_user, Opt_subj_role, Opt_subj_type, - Opt_func, Opt_mask, Opt_fsmagic, Opt_uid, Opt_fowner, + Opt_func, Opt_mask, Opt_fsmagic, + Opt_uid, Opt_euid, Opt_fowner, Opt_appraise_type, Opt_fsuuid, Opt_permit_directio }; @@ -394,6 +457,7 @@ static match_table_t policy_tokens = { {Opt_fsmagic, "fsmagic=%s"}, {Opt_fsuuid, "fsuuid=%s"}, {Opt_uid, "uid=%s"}, + {Opt_euid, "euid=%s"}, {Opt_fowner, "fowner=%s"}, {Opt_appraise_type, "appraise_type=%s"}, {Opt_permit_directio, "permit_directio"}, @@ -435,6 +499,7 @@ static void ima_log_string(struct audit_buffer *ab, char *key, char *value) static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) { struct audit_buffer *ab; + char *from; char *p; int result = 0; @@ -525,18 +590,23 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) if (entry->mask) result = -EINVAL; - if ((strcmp(args[0].from, "MAY_EXEC")) == 0) + from = args[0].from; + if (*from == '^') + from++; + + if ((strcmp(from, "MAY_EXEC")) == 0) entry->mask = MAY_EXEC; - else if (strcmp(args[0].from, "MAY_WRITE") == 0) + else if (strcmp(from, "MAY_WRITE") == 0) entry->mask = MAY_WRITE; - else if (strcmp(args[0].from, "MAY_READ") == 0) + else if (strcmp(from, "MAY_READ") == 0) entry->mask = MAY_READ; - else if (strcmp(args[0].from, "MAY_APPEND") == 0) + else if (strcmp(from, "MAY_APPEND") == 0) entry->mask = MAY_APPEND; else result = -EINVAL; if (!result) - entry->flags |= IMA_MASK; + entry->flags |= (*args[0].from == '^') + ? IMA_INMASK : IMA_MASK; break; case Opt_fsmagic: ima_log_string(ab, "fsmagic", args[0].from); @@ -566,6 +636,9 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) break; case Opt_uid: ima_log_string(ab, "uid", args[0].from); + case Opt_euid: + if (token == Opt_euid) + ima_log_string(ab, "euid", args[0].from); if (uid_valid(entry->uid)) { result = -EINVAL; @@ -574,11 +647,14 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) result = kstrtoul(args[0].from, 10, &lnum); if (!result) { - entry->uid = make_kuid(current_user_ns(), (uid_t)lnum); - if (!uid_valid(entry->uid) || (((uid_t)lnum) != lnum)) + entry->uid = make_kuid(current_user_ns(), + (uid_t) lnum); + if (!uid_valid(entry->uid) || + (uid_t)lnum != lnum) result = -EINVAL; else - entry->flags |= IMA_UID; + entry->flags |= (token == Opt_uid) + ? IMA_UID : IMA_EUID; } break; case Opt_fowner: diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index bcfc36cbde6ae..61fbd0c0d95c0 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -70,7 +70,8 @@ static void ima_show_template_data_ascii(struct seq_file *m, enum data_formats datafmt, struct ima_field_data *field_data) { - u8 *buf_ptr = field_data->data, buflen = field_data->len; + u8 *buf_ptr = field_data->data; + u32 buflen = field_data->len; switch (datafmt) { case DATA_FMT_DIGEST_WITH_ALGO: diff --git a/security/keys/gc.c b/security/keys/gc.c index c7952375ac532..addf060399e09 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -134,6 +134,12 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kdebug("- %u", key->serial); key_check(key); + /* Throw away the key data if the key is instantiated */ + if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && + !test_bit(KEY_FLAG_NEGATIVE, &key->flags) && + key->type->destroy) + key->type->destroy(key); + security_key_free(key); /* deal with the user's key tracking and quota */ @@ -148,10 +154,6 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - /* now throw away the key memory */ - if (key->type->destroy) - key->type->destroy(key); - key_user_put(key->user); kfree(key->description); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0b9ec78a7a7ad..26f0e0a11ed68 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -757,16 +757,16 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) /* the key is probably readable - now try to read it */ can_read_key: - ret = key_validate(key); - if (ret == 0) { - ret = -EOPNOTSUPP; - if (key->type->read) { - /* read the data with the semaphore held (since we - * might sleep) */ - down_read(&key->sem); + ret = -EOPNOTSUPP; + if (key->type->read) { + /* Read the data with the semaphore held (since we might sleep) + * to protect against the key being updated or revoked. + */ + down_read(&key->sem); + ret = key_validate(key); + if (ret == 0) ret = key->type->read(key, buffer, buflen); - up_read(&key->sem); - } + up_read(&key->sem); } error2: diff --git a/security/keys/keyring.c b/security/keys/keyring.c index e72548b5897ec..d33437007ad22 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1181,9 +1181,11 @@ void __key_link_end(struct key *keyring, if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); - if (edit && !edit->dead_leaf) { - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); + if (edit) { + if (!edit->dead_leaf) { + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + } assoc_array_cancel_edit(edit); } up_write(&keyring->sem); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index bd536cb221e23..db91639c81e3a 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -794,6 +794,7 @@ long join_session_keyring(const char *name) ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { + key_put(keyring); ret = 0; goto error2; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7dade28affba5..280235cc3a980 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -403,6 +403,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) return sbsec->behavior == SECURITY_FS_USE_XATTR || sbsec->behavior == SECURITY_FS_USE_TRANS || sbsec->behavior == SECURITY_FS_USE_TASK || + sbsec->behavior == SECURITY_FS_USE_NATIVE || /* Special handling. Genfs but also in-core setxattr handler */ !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || @@ -1094,7 +1095,7 @@ static void selinux_write_opts(struct seq_file *m, seq_puts(m, prefix); if (has_comma) seq_putc(m, '\"'); - seq_puts(m, opts->mnt_opts[i]); + seq_escape(m, opts->mnt_opts[i], "\"\n\\"); if (has_comma) seq_putc(m, '\"'); } @@ -3287,7 +3288,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared int rc = 0; if (default_noexec && - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || + (!shared && (prot & PROT_WRITE)))) { /* * We are making executable an anonymous mapping or a * private file mapping that will also be writable. diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index d2787cca1fcb9..3d22014130289 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = { }; struct vfsmount *selinuxfs_mount; -static struct kobject *selinuxfs_kobj; static int __init init_sel_fs(void) { @@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void) if (!selinux_enabled) return 0; - selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj); - if (!selinuxfs_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "selinux"); + if (err) + return err; err = register_filesystem(&sel_fs_type); if (err) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); return err; } @@ -1887,7 +1886,7 @@ __initcall(init_sel_fs); #ifdef CONFIG_SECURITY_SELINUX_DISABLE void exit_sel_fs(void) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); kern_unmount(selinuxfs_mount); unregister_filesystem(&sel_fs_type); } diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index afe6a269ec177..57644b1dc42e1 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -153,6 +153,12 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, if (offset == (u32)-1) return 0; + /* don't waste ebitmap space if the netlabel bitmap is empty */ + if (bitmap == 0) { + offset += EBITMAP_UNIT_SIZE; + continue; + } + if (e_iter == NULL || offset >= e_iter->startbit + EBITMAP_SIZE) { e_prev = e_iter; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index d9682985349e6..ac4cac7c661a3 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2241,16 +2241,16 @@ static const struct file_operations smk_revoke_subj_ops = { .llseek = generic_file_llseek, }; -static struct kset *smackfs_kset; /** * smk_init_sysfs - initialize /sys/fs/smackfs * */ static int smk_init_sysfs(void) { - smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); - if (!smackfs_kset) - return -ENOMEM; + int err; + err = sysfs_create_mount_point(fs_kobj, "smackfs"); + if (err) + return err; return 0; } diff --git a/sound/arm/Kconfig b/sound/arm/Kconfig index 885683a3b0bdf..e0406211716b0 100644 --- a/sound/arm/Kconfig +++ b/sound/arm/Kconfig @@ -9,6 +9,14 @@ menuconfig SND_ARM Drivers that are implemented on ASoC can be found in "ALSA for SoC audio support" section. +config SND_PXA2XX_LIB + tristate + select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 + select SND_DMAENGINE_PCM + +config SND_PXA2XX_LIB_AC97 + bool + if SND_ARM config SND_ARMAACI @@ -21,13 +29,6 @@ config SND_PXA2XX_PCM tristate select SND_PCM -config SND_PXA2XX_LIB - tristate - select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 - -config SND_PXA2XX_LIB_AC97 - bool - config SND_PXA2XX_AC97 tristate "AC97 driver for the Intel PXA2xx chip" depends on ARCH_PXA diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index b123c42e7dc89..b554d7f9e3be1 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -44,6 +44,13 @@ #include #include +/* struct snd_compr_codec_caps overflows the ioctl bit size for some + * architectures, so we need to disable the relevant ioctls. + */ +#if _IOC_SIZEBITS < 14 +#define COMPR_CODEC_CAPS_OVERFLOW +#endif + /* TODO: * - add substream support for multiple devices in case of * SND_DYNAMIC_MINORS is not used @@ -438,6 +445,7 @@ snd_compr_get_caps(struct snd_compr_stream *stream, unsigned long arg) return retval; } +#ifndef COMPR_CODEC_CAPS_OVERFLOW static int snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg) { @@ -461,6 +469,7 @@ snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg) kfree(caps); return retval; } +#endif /* !COMPR_CODEC_CAPS_OVERFLOW */ /* revisit this with snd_pcm_preallocate_xxx */ static int snd_compr_allocate_buffer(struct snd_compr_stream *stream, @@ -799,9 +808,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg) case _IOC_NR(SNDRV_COMPRESS_GET_CAPS): retval = snd_compr_get_caps(stream, arg); break; +#ifndef COMPR_CODEC_CAPS_OVERFLOW case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS): retval = snd_compr_get_codec_caps(stream, arg); break; +#endif case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS): retval = snd_compr_set_params(stream, arg); break; diff --git a/sound/core/control.c b/sound/core/control.c index 196a6fe100ca8..a85d45595d02a 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1405,6 +1405,8 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, return -EFAULT; if (tlv.length < sizeof(unsigned int) * 2) return -EINVAL; + if (!tlv.numid) + return -EINVAL; down_read(&card->controls_rwsem); kctl = snd_ctl_find_numid(card, tlv.numid); if (kctl == NULL) { diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index b9c0910fb8c4e..0608f216f3592 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -170,6 +170,19 @@ struct snd_ctl_elem_value32 { unsigned char reserved[128]; }; +#ifdef CONFIG_X86_X32 +/* x32 has a different alignment for 64bit values from ia32 */ +struct snd_ctl_elem_value_x32 { + struct snd_ctl_elem_id id; + unsigned int indirect; /* bit-field causes misalignment */ + union { + s32 integer[128]; + unsigned char data[512]; + s64 integer64[64]; + } value; + unsigned char reserved[128]; +}; +#endif /* CONFIG_X86_X32 */ /* get the value type and count of the control */ static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id, @@ -219,9 +232,11 @@ static int get_elem_size(int type, int count) static int copy_ctl_value_from_user(struct snd_card *card, struct snd_ctl_elem_value *data, - struct snd_ctl_elem_value32 __user *data32, + void __user *userdata, + void __user *valuep, int *typep, int *countp) { + struct snd_ctl_elem_value32 __user *data32 = userdata; int i, type, size; int uninitialized_var(count); unsigned int indirect; @@ -239,8 +254,9 @@ static int copy_ctl_value_from_user(struct snd_card *card, if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < count; i++) { + s32 __user *intp = valuep; int val; - if (get_user(val, &data32->value.integer[i])) + if (get_user(val, &intp[i])) return -EFAULT; data->value.integer.value[i] = val; } @@ -250,8 +266,7 @@ static int copy_ctl_value_from_user(struct snd_card *card, dev_err(card->dev, "snd_ioctl32_ctl_elem_value: unknown type %d\n", type); return -EINVAL; } - if (copy_from_user(data->value.bytes.data, - data32->value.data, size)) + if (copy_from_user(data->value.bytes.data, valuep, size)) return -EFAULT; } @@ -261,7 +276,8 @@ static int copy_ctl_value_from_user(struct snd_card *card, } /* restore the value to 32bit */ -static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32, +static int copy_ctl_value_to_user(void __user *userdata, + void __user *valuep, struct snd_ctl_elem_value *data, int type, int count) { @@ -270,22 +286,22 @@ static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32, if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < count; i++) { + s32 __user *intp = valuep; int val; val = data->value.integer.value[i]; - if (put_user(val, &data32->value.integer[i])) + if (put_user(val, &intp[i])) return -EFAULT; } } else { size = get_elem_size(type, count); - if (copy_to_user(data32->value.data, - data->value.bytes.data, size)) + if (copy_to_user(valuep, data->value.bytes.data, size)) return -EFAULT; } return 0; } -static int snd_ctl_elem_read_user_compat(struct snd_card *card, - struct snd_ctl_elem_value32 __user *data32) +static int ctl_elem_read_user(struct snd_card *card, + void __user *userdata, void __user *valuep) { struct snd_ctl_elem_value *data; int err, type, count; @@ -294,7 +310,9 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card, if (data == NULL) return -ENOMEM; - if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0) + err = copy_ctl_value_from_user(card, data, userdata, valuep, + &type, &count); + if (err < 0) goto error; snd_power_lock(card); @@ -303,14 +321,15 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card, err = snd_ctl_elem_read(card, data); snd_power_unlock(card); if (err >= 0) - err = copy_ctl_value_to_user(data32, data, type, count); + err = copy_ctl_value_to_user(userdata, valuep, data, + type, count); error: kfree(data); return err; } -static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, - struct snd_ctl_elem_value32 __user *data32) +static int ctl_elem_write_user(struct snd_ctl_file *file, + void __user *userdata, void __user *valuep) { struct snd_ctl_elem_value *data; struct snd_card *card = file->card; @@ -320,7 +339,9 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, if (data == NULL) return -ENOMEM; - if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0) + err = copy_ctl_value_from_user(card, data, userdata, valuep, + &type, &count); + if (err < 0) goto error; snd_power_lock(card); @@ -329,12 +350,39 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, err = snd_ctl_elem_write(card, file, data); snd_power_unlock(card); if (err >= 0) - err = copy_ctl_value_to_user(data32, data, type, count); + err = copy_ctl_value_to_user(userdata, valuep, data, + type, count); error: kfree(data); return err; } +static int snd_ctl_elem_read_user_compat(struct snd_card *card, + struct snd_ctl_elem_value32 __user *data32) +{ + return ctl_elem_read_user(card, data32, &data32->value); +} + +static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, + struct snd_ctl_elem_value32 __user *data32) +{ + return ctl_elem_write_user(file, data32, &data32->value); +} + +#ifdef CONFIG_X86_X32 +static int snd_ctl_elem_read_user_x32(struct snd_card *card, + struct snd_ctl_elem_value_x32 __user *data32) +{ + return ctl_elem_read_user(card, data32, &data32->value); +} + +static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file, + struct snd_ctl_elem_value_x32 __user *data32) +{ + return ctl_elem_write_user(file, data32, &data32->value); +} +#endif /* CONFIG_X86_X32 */ + /* add or replace a user control */ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, struct snd_ctl_elem_info32 __user *data32, @@ -393,6 +441,10 @@ enum { SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32), SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32), SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32), +#ifdef CONFIG_X86_X32 + SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32), + SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32), +#endif /* CONFIG_X86_X32 */ }; static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -431,6 +483,12 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_ctl_elem_add_compat(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE32: return snd_ctl_elem_add_compat(ctl, argp, 1); +#ifdef CONFIG_X86_X32 + case SNDRV_CTL_IOCTL_ELEM_READ_X32: + return snd_ctl_elem_read_user_x32(ctl->card, argp); + case SNDRV_CTL_IOCTL_ELEM_WRITE_X32: + return snd_ctl_elem_write_user_x32(ctl, argp); +#endif /* CONFIG_X86_X32 */ } down_read(&snd_ioctl_rwsem); diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index 886be7da989d1..38514ed6e55c2 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t) struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0); - hrtimer_cancel(&stime->hrt); + hrtimer_try_to_cancel(&stime->hrt); hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); atomic_set(&stime->running, 1); @@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0); + hrtimer_try_to_cancel(&stime->hrt); return 0; } diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 58550cc93f280..33e72c809e50e 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -834,7 +834,8 @@ static int choose_rate(struct snd_pcm_substream *substream, return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL); } -static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream) +static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, + bool trylock) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_params *params, *sparams; @@ -848,7 +849,10 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream) struct snd_mask sformat_mask; struct snd_mask mask; - if (mutex_lock_interruptible(&runtime->oss.params_lock)) + if (trylock) { + if (!(mutex_trylock(&runtime->oss.params_lock))) + return -EAGAIN; + } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -EINTR; sw_params = kmalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); @@ -1092,7 +1096,7 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil if (asubstream == NULL) asubstream = substream; if (substream->runtime->oss.params) { - err = snd_pcm_oss_change_params(substream); + err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } @@ -1132,7 +1136,7 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) return 0; runtime = substream->runtime; if (runtime->oss.params) { - err = snd_pcm_oss_change_params(substream); + err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } @@ -2163,7 +2167,7 @@ static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stre runtime = substream->runtime; if (runtime->oss.params && - (err = snd_pcm_oss_change_params(substream)) < 0) + (err = snd_pcm_oss_change_params(substream, false)) < 0) return err; info.fragsize = runtime->oss.period_bytes; @@ -2800,7 +2804,12 @@ static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area) return -EIO; if (runtime->oss.params) { - if ((err = snd_pcm_oss_change_params(substream)) < 0) + /* use mutex_trylock() for params_lock for avoiding a deadlock + * between mmap_sem and params_lock taken by + * copy_from/to_user() in snd_pcm_oss_write/read() + */ + err = snd_pcm_oss_change_params(substream, true); + if (err < 0) return err; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS diff --git a/sound/core/pcm.c b/sound/core/pcm.c index b25bcf5b86446..dfed728d8c872 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -1027,7 +1027,8 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) static ssize_t show_pcm_class(struct device *dev, struct device_attribute *attr, char *buf) { - struct snd_pcm *pcm; + struct snd_pcm_str *pstr = container_of(dev, struct snd_pcm_str, dev); + struct snd_pcm *pcm = pstr->pcm; const char *str; static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = { [SNDRV_PCM_CLASS_GENERIC] = "generic", @@ -1036,8 +1037,7 @@ static ssize_t show_pcm_class(struct device *dev, [SNDRV_PCM_CLASS_DIGITIZER] = "digitizer", }; - if (! (pcm = dev_get_drvdata(dev)) || - pcm->dev_class > SNDRV_PCM_CLASS_LAST) + if (pcm->dev_class > SNDRV_PCM_CLASS_LAST) str = "none"; else str = strs[pcm->dev_class]; diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index b48b434444ed0..1f64ab0c2a95a 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -183,6 +183,14 @@ static int snd_pcm_ioctl_channel_info_compat(struct snd_pcm_substream *substream return err; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */ +static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, + struct snd_pcm_channel_info __user *src); +#define snd_pcm_ioctl_channel_info_x32(s, p) \ + snd_pcm_channel_info_user(s, p) +#endif /* CONFIG_X86_X32 */ + struct snd_pcm_status32 { s32 state; struct compat_timespec trigger_tstamp; @@ -243,6 +251,71 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream, return err; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_pcm_status_x32 { + s32 state; + u32 rsvd; /* alignment */ + struct timespec trigger_tstamp; + struct timespec tstamp; + u32 appl_ptr; + u32 hw_ptr; + s32 delay; + u32 avail; + u32 avail_max; + u32 overrange; + s32 suspended_state; + u32 audio_tstamp_data; + struct timespec audio_tstamp; + struct timespec driver_tstamp; + u32 audio_tstamp_accuracy; + unsigned char reserved[52-2*sizeof(struct timespec)]; +} __packed; + +#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst)) + +static int snd_pcm_status_user_x32(struct snd_pcm_substream *substream, + struct snd_pcm_status_x32 __user *src, + bool ext) +{ + struct snd_pcm_status status; + int err; + + memset(&status, 0, sizeof(status)); + /* + * with extension, parameters are read/write, + * get audio_tstamp_data from user, + * ignore rest of status structure + */ + if (ext && get_user(status.audio_tstamp_data, + (u32 __user *)(&src->audio_tstamp_data))) + return -EFAULT; + err = snd_pcm_status(substream, &status); + if (err < 0) + return err; + + if (clear_user(src, sizeof(*src))) + return -EFAULT; + if (put_user(status.state, &src->state) || + put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) || + put_timespec(&status.tstamp, &src->tstamp) || + put_user(status.appl_ptr, &src->appl_ptr) || + put_user(status.hw_ptr, &src->hw_ptr) || + put_user(status.delay, &src->delay) || + put_user(status.avail, &src->avail) || + put_user(status.avail_max, &src->avail_max) || + put_user(status.overrange, &src->overrange) || + put_user(status.suspended_state, &src->suspended_state) || + put_user(status.audio_tstamp_data, &src->audio_tstamp_data) || + put_timespec(&status.audio_tstamp, &src->audio_tstamp) || + put_timespec(&status.driver_tstamp, &src->driver_tstamp) || + put_user(status.audio_tstamp_accuracy, &src->audio_tstamp_accuracy)) + return -EFAULT; + + return err; +} +#endif /* CONFIG_X86_X32 */ + /* both for HW_PARAMS and HW_REFINE */ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream, int refine, @@ -255,10 +328,15 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream, if (! (runtime = substream->runtime)) return -ENOTTY; - /* only fifo_size is different, so just copy all */ - data = memdup_user(data32, sizeof(*data32)); - if (IS_ERR(data)) - return PTR_ERR(data); + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* only fifo_size (RO from userspace) is different, so just copy all */ + if (copy_from_user(data, data32, sizeof(*data32))) { + err = -EFAULT; + goto error; + } if (refine) err = snd_pcm_hw_refine(substream, data); @@ -464,6 +542,93 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_pcm_mmap_status_x32 { + s32 state; + s32 pad1; + u32 hw_ptr; + u32 pad2; /* alignment */ + struct timespec tstamp; + s32 suspended_state; + struct timespec audio_tstamp; +} __packed; + +struct snd_pcm_mmap_control_x32 { + u32 appl_ptr; + u32 avail_min; +}; + +struct snd_pcm_sync_ptr_x32 { + u32 flags; + u32 rsvd; /* alignment */ + union { + struct snd_pcm_mmap_status_x32 status; + unsigned char reserved[64]; + } s; + union { + struct snd_pcm_mmap_control_x32 control; + unsigned char reserved[64]; + } c; +} __packed; + +static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, + struct snd_pcm_sync_ptr_x32 __user *src) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + volatile struct snd_pcm_mmap_status *status; + volatile struct snd_pcm_mmap_control *control; + u32 sflags; + struct snd_pcm_mmap_control scontrol; + struct snd_pcm_mmap_status sstatus; + snd_pcm_uframes_t boundary; + int err; + + if (snd_BUG_ON(!runtime)) + return -EINVAL; + + if (get_user(sflags, &src->flags) || + get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || + get_user(scontrol.avail_min, &src->c.control.avail_min)) + return -EFAULT; + if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { + err = snd_pcm_hwsync(substream); + if (err < 0) + return err; + } + status = runtime->status; + control = runtime->control; + boundary = recalculate_boundary(runtime); + if (!boundary) + boundary = 0x7fffffff; + snd_pcm_stream_lock_irq(substream); + /* FIXME: we should consider the boundary for the sync from app */ + if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) + control->appl_ptr = scontrol.appl_ptr; + else + scontrol.appl_ptr = control->appl_ptr % boundary; + if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) + control->avail_min = scontrol.avail_min; + else + scontrol.avail_min = control->avail_min; + sstatus.state = status->state; + sstatus.hw_ptr = status->hw_ptr % boundary; + sstatus.tstamp = status->tstamp; + sstatus.suspended_state = status->suspended_state; + sstatus.audio_tstamp = status->audio_tstamp; + snd_pcm_stream_unlock_irq(substream); + if (put_user(sstatus.state, &src->s.status.state) || + put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || + put_timespec(&sstatus.tstamp, &src->s.status.tstamp) || + put_user(sstatus.suspended_state, &src->s.status.suspended_state) || + put_timespec(&sstatus.audio_tstamp, &src->s.status.audio_tstamp) || + put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || + put_user(scontrol.avail_min, &src->c.control.avail_min)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_X86_X32 */ /* */ @@ -482,7 +647,12 @@ enum { SNDRV_PCM_IOCTL_WRITEN_FRAMES32 = _IOW('A', 0x52, struct snd_xfern32), SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32), SNDRV_PCM_IOCTL_SYNC_PTR32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr32), - +#ifdef CONFIG_X86_X32 + SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info), + SNDRV_PCM_IOCTL_STATUS_X32 = _IOR('A', 0x20, struct snd_pcm_status_x32), + SNDRV_PCM_IOCTL_STATUS_EXT_X32 = _IOWR('A', 0x24, struct snd_pcm_status_x32), + SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32), +#endif /* CONFIG_X86_X32 */ }; static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -554,6 +724,16 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l return snd_pcm_ioctl_rewind_compat(substream, argp); case SNDRV_PCM_IOCTL_FORWARD32: return snd_pcm_ioctl_forward_compat(substream, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_PCM_IOCTL_STATUS_X32: + return snd_pcm_status_user_x32(substream, argp, false); + case SNDRV_PCM_IOCTL_STATUS_EXT_X32: + return snd_pcm_status_user_x32(substream, argp, true); + case SNDRV_PCM_IOCTL_SYNC_PTR_X32: + return snd_pcm_ioctl_sync_ptr_x32(substream, argp); + case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32: + return snd_pcm_ioctl_channel_info_x32(substream, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index d126c03361aef..aa999e747c94b 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -74,6 +74,18 @@ static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream); static DEFINE_RWLOCK(snd_pcm_link_rwlock); static DECLARE_RWSEM(snd_pcm_link_rwsem); +/* Writer in rwsem may block readers even during its waiting in queue, + * and this may lead to a deadlock when the code path takes read sem + * twice (e.g. one in snd_pcm_action_nonatomic() and another in + * snd_pcm_stream_lock()). As a (suboptimal) workaround, let writer to + * spin until it gets the lock. + */ +static inline void down_write_nonblock(struct rw_semaphore *lock) +{ + while (!down_write_trylock(lock)) + cond_resched(); +} + /** * snd_pcm_stream_lock - Lock the PCM stream * @substream: PCM substream @@ -85,7 +97,7 @@ static DECLARE_RWSEM(snd_pcm_link_rwsem); void snd_pcm_stream_lock(struct snd_pcm_substream *substream) { if (substream->pcm->nonatomic) { - down_read(&snd_pcm_link_rwsem); + down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING); mutex_lock(&substream->self_group.mutex); } else { read_lock(&snd_pcm_link_rwlock); @@ -1816,7 +1828,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) res = -ENOMEM; goto _nolock; } - down_write(&snd_pcm_link_rwsem); + down_write_nonblock(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN || substream->runtime->status->state != substream1->runtime->status->state || @@ -1863,7 +1875,7 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream) struct snd_pcm_substream *s; int res = 0; - down_write(&snd_pcm_link_rwsem); + down_write_nonblock(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (!snd_pcm_stream_linked(substream)) { res = -EALREADY; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index a7759846fbaad..795437b100820 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -942,31 +942,36 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, unsigned long flags; long result = 0, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; + unsigned long appl_ptr; + spin_lock_irqsave(&runtime->lock, flags); while (count > 0 && runtime->avail) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) count1 = count; - spin_lock_irqsave(&runtime->lock, flags); if (count1 > (int)runtime->avail) count1 = runtime->avail; + + /* update runtime->appl_ptr before unlocking for userbuf */ + appl_ptr = runtime->appl_ptr; + runtime->appl_ptr += count1; + runtime->appl_ptr %= runtime->buffer_size; + runtime->avail -= count1; + if (kernelbuf) - memcpy(kernelbuf + result, runtime->buffer + runtime->appl_ptr, count1); + memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1); if (userbuf) { spin_unlock_irqrestore(&runtime->lock, flags); if (copy_to_user(userbuf + result, - runtime->buffer + runtime->appl_ptr, count1)) { + runtime->buffer + appl_ptr, count1)) { return result > 0 ? result : -EFAULT; } spin_lock_irqsave(&runtime->lock, flags); } - runtime->appl_ptr += count1; - runtime->appl_ptr %= runtime->buffer_size; - runtime->avail -= count1; - spin_unlock_irqrestore(&runtime->lock, flags); result += count1; count -= count1; } + spin_unlock_irqrestore(&runtime->lock, flags); return result; } @@ -1055,23 +1060,16 @@ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream) EXPORT_SYMBOL(snd_rawmidi_transmit_empty); /** - * snd_rawmidi_transmit_peek - copy data from the internal buffer + * __snd_rawmidi_transmit_peek - copy data from the internal buffer * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: data size to transfer * - * Copies data from the internal output buffer to the given buffer. - * - * Call this in the interrupt handler when the midi output is ready, - * and call snd_rawmidi_transmit_ack() after the transmission is - * finished. - * - * Return: The size of copied data, or a negative error code on failure. + * This is a variant of snd_rawmidi_transmit_peek() without spinlock. */ -int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { - unsigned long flags; int result, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; @@ -1081,7 +1079,6 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, return -EINVAL; } result = 0; - spin_lock_irqsave(&runtime->lock, flags); if (runtime->avail >= runtime->buffer_size) { /* warning: lowlevel layer MUST trigger down the hardware */ goto __skip; @@ -1106,25 +1103,47 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, } } __skip: + return result; +} +EXPORT_SYMBOL(__snd_rawmidi_transmit_peek); + +/** + * snd_rawmidi_transmit_peek - copy data from the internal buffer + * @substream: the rawmidi substream + * @buffer: the buffer pointer + * @count: data size to transfer + * + * Copies data from the internal output buffer to the given buffer. + * + * Call this in the interrupt handler when the midi output is ready, + * and call snd_rawmidi_transmit_ack() after the transmission is + * finished. + * + * Return: The size of copied data, or a negative error code on failure. + */ +int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count) +{ + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); + result = __snd_rawmidi_transmit_peek(substream, buffer, count); spin_unlock_irqrestore(&runtime->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit_peek); /** - * snd_rawmidi_transmit_ack - acknowledge the transmission + * __snd_rawmidi_transmit_ack - acknowledge the transmission * @substream: the rawmidi substream * @count: the transferred count * - * Advances the hardware pointer for the internal output buffer with - * the given size and updates the condition. - * Call after the transmission is finished. - * - * Return: The advanced size if successful, or a negative error code on failure. + * This is a variant of __snd_rawmidi_transmit_ack() without spinlock. */ -int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) { - unsigned long flags; struct snd_rawmidi_runtime *runtime = substream->runtime; if (runtime->buffer == NULL) { @@ -1132,7 +1151,6 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) "snd_rawmidi_transmit_ack: output is not active!!!\n"); return -EINVAL; } - spin_lock_irqsave(&runtime->lock, flags); snd_BUG_ON(runtime->avail + count > runtime->buffer_size); runtime->hw_ptr += count; runtime->hw_ptr %= runtime->buffer_size; @@ -1142,9 +1160,32 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) if (runtime->drain || snd_rawmidi_ready(substream)) wake_up(&runtime->sleep); } - spin_unlock_irqrestore(&runtime->lock, flags); return count; } +EXPORT_SYMBOL(__snd_rawmidi_transmit_ack); + +/** + * snd_rawmidi_transmit_ack - acknowledge the transmission + * @substream: the rawmidi substream + * @count: the transferred count + * + * Advances the hardware pointer for the internal output buffer with + * the given size and updates the condition. + * Call after the transmission is finished. + * + * Return: The advanced size if successful, or a negative error code on failure. + */ +int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) +{ + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); + result = __snd_rawmidi_transmit_ack(substream, count); + spin_unlock_irqrestore(&runtime->lock, flags); + return result; +} EXPORT_SYMBOL(snd_rawmidi_transmit_ack); /** @@ -1160,12 +1201,22 @@ EXPORT_SYMBOL(snd_rawmidi_transmit_ack); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); if (!substream->opened) - return -EBADFD; - count = snd_rawmidi_transmit_peek(substream, buffer, count); - if (count < 0) - return count; - return snd_rawmidi_transmit_ack(substream, count); + result = -EBADFD; + else { + count = __snd_rawmidi_transmit_peek(substream, buffer, count); + if (count <= 0) + result = count; + else + result = __snd_rawmidi_transmit_ack(substream, count); + } + spin_unlock_irqrestore(&runtime->lock, flags); + return result; } EXPORT_SYMBOL(snd_rawmidi_transmit); @@ -1177,8 +1228,9 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, unsigned long flags; long count1, result; struct snd_rawmidi_runtime *runtime = substream->runtime; + unsigned long appl_ptr; - if (snd_BUG_ON(!kernelbuf && !userbuf)) + if (!kernelbuf && !userbuf) return -EINVAL; if (snd_BUG_ON(!runtime->buffer)) return -EINVAL; @@ -1197,12 +1249,19 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, count1 = count; if (count1 > (long)runtime->avail) count1 = runtime->avail; + + /* update runtime->appl_ptr before unlocking for userbuf */ + appl_ptr = runtime->appl_ptr; + runtime->appl_ptr += count1; + runtime->appl_ptr %= runtime->buffer_size; + runtime->avail -= count1; + if (kernelbuf) - memcpy(runtime->buffer + runtime->appl_ptr, + memcpy(runtime->buffer + appl_ptr, kernelbuf + result, count1); else if (userbuf) { spin_unlock_irqrestore(&runtime->lock, flags); - if (copy_from_user(runtime->buffer + runtime->appl_ptr, + if (copy_from_user(runtime->buffer + appl_ptr, userbuf + result, count1)) { spin_lock_irqsave(&runtime->lock, flags); result = result > 0 ? result : -EFAULT; @@ -1210,9 +1269,6 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, } spin_lock_irqsave(&runtime->lock, flags); } - runtime->appl_ptr += count1; - runtime->appl_ptr %= runtime->buffer_size; - runtime->avail -= count1; result += count1; count -= count1; } diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index 5268c1f58c25b..09a89094dcf72 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -94,9 +94,58 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_rawmidi_status_x32 { + s32 stream; + u32 rsvd; /* alignment */ + struct timespec tstamp; + u32 avail; + u32 xruns; + unsigned char reserved[16]; +} __attribute__((packed)); + +#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst)) + +static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, + struct snd_rawmidi_status_x32 __user *src) +{ + int err; + struct snd_rawmidi_status status; + + if (rfile->output == NULL) + return -EINVAL; + if (get_user(status.stream, &src->stream)) + return -EFAULT; + + switch (status.stream) { + case SNDRV_RAWMIDI_STREAM_OUTPUT: + err = snd_rawmidi_output_status(rfile->output, &status); + break; + case SNDRV_RAWMIDI_STREAM_INPUT: + err = snd_rawmidi_input_status(rfile->input, &status); + break; + default: + return -EINVAL; + } + if (err < 0) + return err; + + if (put_timespec(&status.tstamp, &src->tstamp) || + put_user(status.avail, &src->avail) || + put_user(status.xruns, &src->xruns)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_X86_X32 */ + enum { SNDRV_RAWMIDI_IOCTL_PARAMS32 = _IOWR('W', 0x10, struct snd_rawmidi_params32), SNDRV_RAWMIDI_IOCTL_STATUS32 = _IOWR('W', 0x20, struct snd_rawmidi_status32), +#ifdef CONFIG_X86_X32 + SNDRV_RAWMIDI_IOCTL_STATUS_X32 = _IOWR('W', 0x20, struct snd_rawmidi_status_x32), +#endif /* CONFIG_X86_X32 */ }; static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -115,6 +164,10 @@ static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsign return snd_rawmidi_ioctl_params_compat(rfile, argp); case SNDRV_RAWMIDI_IOCTL_STATUS32: return snd_rawmidi_ioctl_status_compat(rfile, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_RAWMIDI_IOCTL_STATUS_X32: + return snd_rawmidi_ioctl_status_x32(rfile, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; } diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c index 72873a46afeb3..4b53b8f2330f8 100644 --- a/sound/core/seq/oss/seq_oss.c +++ b/sound/core/seq/oss/seq_oss.c @@ -148,8 +148,6 @@ odev_release(struct inode *inode, struct file *file) if ((dp = file->private_data) == NULL) return 0; - snd_seq_oss_drain_write(dp); - mutex_lock(®ister_mutex); snd_seq_oss_release(dp); mutex_unlock(®ister_mutex); diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h index b43924325249c..d7b4d016b5475 100644 --- a/sound/core/seq/oss/seq_oss_device.h +++ b/sound/core/seq/oss/seq_oss_device.h @@ -127,7 +127,6 @@ int snd_seq_oss_write(struct seq_oss_devinfo *dp, const char __user *buf, int co unsigned int snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wait); void snd_seq_oss_reset(struct seq_oss_devinfo *dp); -void snd_seq_oss_drain_write(struct seq_oss_devinfo *dp); /* */ void snd_seq_oss_process_queue(struct seq_oss_devinfo *dp, abstime_t time); diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index 2de3feff70d06..0b9c18b2e45f5 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level) dp->index = i; if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) { - pr_err("ALSA: seq_oss: too many applications\n"); + pr_debug("ALSA: seq_oss: too many applications\n"); rc = -ENOMEM; goto _error; } @@ -435,22 +435,6 @@ snd_seq_oss_release(struct seq_oss_devinfo *dp) } -/* - * Wait until the queue is empty (if we don't have nonblock) - */ -void -snd_seq_oss_drain_write(struct seq_oss_devinfo *dp) -{ - if (! dp->timer->running) - return; - if (is_write_mode(dp->file_mode) && !is_nonblock_mode(dp->file_mode) && - dp->writeq) { - while (snd_seq_oss_writeq_sync(dp->writeq)) - ; - } -} - - /* * reset sequencer devices */ diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 48e4fe1b68abb..f38cf91b4faf3 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp) struct seq_oss_synth *rec; struct seq_oss_synthinfo *info; - if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS)) + if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS)) return; for (i = 0; i < dp->max_synthdev; i++) { info = &dp->synths[i]; diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index edbdab85fc02f..ce6703ecfcefc 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -678,6 +678,9 @@ static int deliver_to_subscribers(struct snd_seq_client *client, else down_read(&grp->list_mutex); list_for_each_entry(subs, &grp->list_head, src_list) { + /* both ports ready? */ + if (atomic_read(&subs->ref_count) != 2) + continue; event->dest = subs->info.dest; if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP) /* convert time according to flag with subscription */ @@ -1962,7 +1965,7 @@ static int snd_seq_ioctl_remove_events(struct snd_seq_client *client, * No restrictions so for a user client we can clear * the whole fifo */ - if (client->type == USER_CLIENT) + if (client->type == USER_CLIENT && client->data.user.fifo) snd_seq_fifo_clear(client->data.user.fifo); } diff --git a/sound/core/seq/seq_compat.c b/sound/core/seq/seq_compat.c index 81f7c109dc46e..65175902a68a8 100644 --- a/sound/core/seq/seq_compat.c +++ b/sound/core/seq/seq_compat.c @@ -49,11 +49,12 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned struct snd_seq_port_info *data; mm_segment_t fs; - data = memdup_user(data32, sizeof(*data32)); - if (IS_ERR(data)) - return PTR_ERR(data); + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; - if (get_user(data->flags, &data32->flags) || + if (copy_from_user(data, data32, sizeof(*data32)) || + get_user(data->flags, &data32->flags) || get_user(data->time_queue, &data32->time_queue)) goto error; data->kernel = NULL; diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index 801076687bb16..c850345c43b53 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -383,15 +383,20 @@ int snd_seq_pool_init(struct snd_seq_pool *pool) if (snd_BUG_ON(!pool)) return -EINVAL; - if (pool->ptr) /* should be atomic? */ - return 0; - pool->ptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size); - if (!pool->ptr) + cellptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size); + if (!cellptr) return -ENOMEM; /* add new cells to the free cell list */ spin_lock_irqsave(&pool->lock, flags); + if (pool->ptr) { + spin_unlock_irqrestore(&pool->lock, flags); + vfree(cellptr); + return 0; + } + + pool->ptr = cellptr; pool->free = NULL; for (cell = 0; cell < pool->size; cell++) { diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index 55170a20ae723..fe686ee41c6da 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -173,10 +173,6 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, } /* */ -enum group_type { - SRC_LIST, DEST_LIST -}; - static int subscribe_port(struct snd_seq_client *client, struct snd_seq_client_port *port, struct snd_seq_port_subs_info *grp, @@ -203,6 +199,20 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr, return NULL; } +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack); + +static inline struct snd_seq_subscribers * +get_subscriber(struct list_head *p, bool is_src) +{ + if (is_src) + return list_entry(p, struct snd_seq_subscribers, src_list); + else + return list_entry(p, struct snd_seq_subscribers, dest_list); +} + /* * remove all subscribers on the list * this is called from port_delete, for each src and dest list. @@ -210,7 +220,7 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr, static void clear_subscriber_list(struct snd_seq_client *client, struct snd_seq_client_port *port, struct snd_seq_port_subs_info *grp, - int grptype) + int is_src) { struct list_head *p, *n; @@ -219,15 +229,13 @@ static void clear_subscriber_list(struct snd_seq_client *client, struct snd_seq_client *c; struct snd_seq_client_port *aport; - if (grptype == SRC_LIST) { - subs = list_entry(p, struct snd_seq_subscribers, src_list); + subs = get_subscriber(p, is_src); + if (is_src) aport = get_client_port(&subs->info.dest, &c); - } else { - subs = list_entry(p, struct snd_seq_subscribers, dest_list); + else aport = get_client_port(&subs->info.sender, &c); - } - list_del(p); - unsubscribe_port(client, port, grp, &subs->info, 0); + delete_and_unsubscribe_port(client, port, subs, is_src, false); + if (!aport) { /* looks like the connected port is being deleted. * we decrease the counter, and when both ports are deleted @@ -235,21 +243,14 @@ static void clear_subscriber_list(struct snd_seq_client *client, */ if (atomic_dec_and_test(&subs->ref_count)) kfree(subs); - } else { - /* ok we got the connected port */ - struct snd_seq_port_subs_info *agrp; - agrp = (grptype == SRC_LIST) ? &aport->c_dest : &aport->c_src; - down_write(&agrp->list_mutex); - if (grptype == SRC_LIST) - list_del(&subs->dest_list); - else - list_del(&subs->src_list); - up_write(&agrp->list_mutex); - unsubscribe_port(c, aport, agrp, &subs->info, 1); - kfree(subs); - snd_seq_port_unlock(aport); - snd_seq_client_unlock(c); + continue; } + + /* ok we got the connected port */ + delete_and_unsubscribe_port(c, aport, subs, !is_src, true); + kfree(subs); + snd_seq_port_unlock(aport); + snd_seq_client_unlock(c); } } @@ -262,8 +263,8 @@ static int port_delete(struct snd_seq_client *client, snd_use_lock_sync(&port->use_lock); /* clear subscribers info */ - clear_subscriber_list(client, port, &port->c_src, SRC_LIST); - clear_subscriber_list(client, port, &port->c_dest, DEST_LIST); + clear_subscriber_list(client, port, &port->c_src, true); + clear_subscriber_list(client, port, &port->c_dest, false); if (port->private_free) port->private_free(port->private_data); @@ -479,85 +480,123 @@ static int match_subs_info(struct snd_seq_port_subscribe *r, return 0; } - -/* connect two ports */ -int snd_seq_port_connect(struct snd_seq_client *connector, - struct snd_seq_client *src_client, - struct snd_seq_client_port *src_port, - struct snd_seq_client *dest_client, - struct snd_seq_client_port *dest_port, - struct snd_seq_port_subscribe *info) +static int check_and_subscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool exclusive, bool ack) { - struct snd_seq_port_subs_info *src = &src_port->c_src; - struct snd_seq_port_subs_info *dest = &dest_port->c_dest; - struct snd_seq_subscribers *subs, *s; - int err, src_called = 0; - unsigned long flags; - int exclusive; - - subs = kzalloc(sizeof(*subs), GFP_KERNEL); - if (! subs) - return -ENOMEM; - - subs->info = *info; - atomic_set(&subs->ref_count, 2); + struct snd_seq_port_subs_info *grp; + struct list_head *p; + struct snd_seq_subscribers *s; + int err; - down_write(&src->list_mutex); - down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING); - - exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0; + grp = is_src ? &port->c_src : &port->c_dest; err = -EBUSY; + down_write(&grp->list_mutex); if (exclusive) { - if (! list_empty(&src->list_head) || ! list_empty(&dest->list_head)) + if (!list_empty(&grp->list_head)) goto __error; } else { - if (src->exclusive || dest->exclusive) + if (grp->exclusive) goto __error; /* check whether already exists */ - list_for_each_entry(s, &src->list_head, src_list) { - if (match_subs_info(info, &s->info)) - goto __error; - } - list_for_each_entry(s, &dest->list_head, dest_list) { - if (match_subs_info(info, &s->info)) + list_for_each(p, &grp->list_head) { + s = get_subscriber(p, is_src); + if (match_subs_info(&subs->info, &s->info)) goto __error; } } - if ((err = subscribe_port(src_client, src_port, src, info, - connector->number != src_client->number)) < 0) - goto __error; - src_called = 1; - - if ((err = subscribe_port(dest_client, dest_port, dest, info, - connector->number != dest_client->number)) < 0) + err = subscribe_port(client, port, grp, &subs->info, ack); + if (err < 0) { + grp->exclusive = 0; goto __error; + } /* add to list */ - write_lock_irqsave(&src->list_lock, flags); - // write_lock(&dest->list_lock); // no other lock yet - list_add_tail(&subs->src_list, &src->list_head); - list_add_tail(&subs->dest_list, &dest->list_head); - // write_unlock(&dest->list_lock); // no other lock yet - write_unlock_irqrestore(&src->list_lock, flags); + write_lock_irq(&grp->list_lock); + if (is_src) + list_add_tail(&subs->src_list, &grp->list_head); + else + list_add_tail(&subs->dest_list, &grp->list_head); + grp->exclusive = exclusive; + atomic_inc(&subs->ref_count); + write_unlock_irq(&grp->list_lock); + err = 0; - src->exclusive = dest->exclusive = exclusive; + __error: + up_write(&grp->list_mutex); + return err; +} + +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) +{ + struct snd_seq_port_subs_info *grp; + struct list_head *list; + bool empty; + + grp = is_src ? &port->c_src : &port->c_dest; + list = is_src ? &subs->src_list : &subs->dest_list; + down_write(&grp->list_mutex); + write_lock_irq(&grp->list_lock); + empty = list_empty(list); + if (!empty) + list_del_init(list); + grp->exclusive = 0; + write_unlock_irq(&grp->list_lock); + up_write(&grp->list_mutex); + + if (!empty) + unsubscribe_port(client, port, grp, &subs->info, ack); +} + +/* connect two ports */ +int snd_seq_port_connect(struct snd_seq_client *connector, + struct snd_seq_client *src_client, + struct snd_seq_client_port *src_port, + struct snd_seq_client *dest_client, + struct snd_seq_client_port *dest_port, + struct snd_seq_port_subscribe *info) +{ + struct snd_seq_subscribers *subs; + bool exclusive; + int err; + + subs = kzalloc(sizeof(*subs), GFP_KERNEL); + if (!subs) + return -ENOMEM; + + subs->info = *info; + atomic_set(&subs->ref_count, 0); + INIT_LIST_HEAD(&subs->src_list); + INIT_LIST_HEAD(&subs->dest_list); + + exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE); + + err = check_and_subscribe_port(src_client, src_port, subs, true, + exclusive, + connector->number != src_client->number); + if (err < 0) + goto error; + err = check_and_subscribe_port(dest_client, dest_port, subs, false, + exclusive, + connector->number != dest_client->number); + if (err < 0) + goto error_dest; - up_write(&dest->list_mutex); - up_write(&src->list_mutex); return 0; - __error: - if (src_called) - unsubscribe_port(src_client, src_port, src, info, - connector->number != src_client->number); + error_dest: + delete_and_unsubscribe_port(src_client, src_port, subs, true, + connector->number != src_client->number); + error: kfree(subs); - up_write(&dest->list_mutex); - up_write(&src->list_mutex); return err; } - /* remove the connection */ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_client *src_client, @@ -567,37 +606,28 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_port_subscribe *info) { struct snd_seq_port_subs_info *src = &src_port->c_src; - struct snd_seq_port_subs_info *dest = &dest_port->c_dest; struct snd_seq_subscribers *subs; int err = -ENOENT; - unsigned long flags; down_write(&src->list_mutex); - down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING); - /* look for the connection */ list_for_each_entry(subs, &src->list_head, src_list) { if (match_subs_info(info, &subs->info)) { - write_lock_irqsave(&src->list_lock, flags); - // write_lock(&dest->list_lock); // no lock yet - list_del(&subs->src_list); - list_del(&subs->dest_list); - // write_unlock(&dest->list_lock); - write_unlock_irqrestore(&src->list_lock, flags); - src->exclusive = dest->exclusive = 0; - unsubscribe_port(src_client, src_port, src, info, - connector->number != src_client->number); - unsubscribe_port(dest_client, dest_port, dest, info, - connector->number != dest_client->number); - kfree(subs); + atomic_dec(&subs->ref_count); /* mark as not ready */ err = 0; break; } } - - up_write(&dest->list_mutex); up_write(&src->list_mutex); - return err; + if (err < 0) + return err; + + delete_and_unsubscribe_port(src_client, src_port, subs, true, + connector->number != src_client->number); + delete_and_unsubscribe_port(dest_client, dest_port, subs, false, + connector->number != dest_client->number); + kfree(subs); + return 0; } diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index a0cda38205b97..77ec214203558 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -142,8 +142,10 @@ static struct snd_seq_queue *queue_new(int owner, int locked) static void queue_delete(struct snd_seq_queue *q) { /* stop and release the timer */ + mutex_lock(&q->timer_mutex); snd_seq_timer_stop(q->timer); snd_seq_timer_close(q); + mutex_unlock(&q->timer_mutex); /* wait until access free */ snd_use_lock_sync(&q->use_lock); /* release resources... */ diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 186f1611103c5..a2468f1101d16 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -90,6 +90,9 @@ void snd_seq_timer_delete(struct snd_seq_timer **tmr) void snd_seq_timer_defaults(struct snd_seq_timer * tmr) { + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); /* setup defaults */ tmr->ppq = 96; /* 96 PPQ */ tmr->tempo = 500000; /* 120 BPM */ @@ -105,21 +108,25 @@ void snd_seq_timer_defaults(struct snd_seq_timer * tmr) tmr->preferred_resolution = seq_default_timer_resolution; tmr->skew = tmr->skew_base = SKEW_BASE; + spin_unlock_irqrestore(&tmr->lock, flags); } -void snd_seq_timer_reset(struct snd_seq_timer * tmr) +static void seq_timer_reset(struct snd_seq_timer *tmr) { - unsigned long flags; - - spin_lock_irqsave(&tmr->lock, flags); - /* reset time & songposition */ tmr->cur_time.tv_sec = 0; tmr->cur_time.tv_nsec = 0; tmr->tick.cur_tick = 0; tmr->tick.fraction = 0; +} + +void snd_seq_timer_reset(struct snd_seq_timer *tmr) +{ + unsigned long flags; + spin_lock_irqsave(&tmr->lock, flags); + seq_timer_reset(tmr); spin_unlock_irqrestore(&tmr->lock, flags); } @@ -138,8 +145,11 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri, tmr = q->timer; if (tmr == NULL) return; - if (!tmr->running) + spin_lock_irqsave(&tmr->lock, flags); + if (!tmr->running) { + spin_unlock_irqrestore(&tmr->lock, flags); return; + } resolution *= ticks; if (tmr->skew != tmr->skew_base) { @@ -148,8 +158,6 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri, (((resolution & 0xffff) * tmr->skew) >> 16); } - spin_lock_irqsave(&tmr->lock, flags); - /* update timer */ snd_seq_inc_time_nsec(&tmr->cur_time, resolution); @@ -296,26 +304,30 @@ int snd_seq_timer_open(struct snd_seq_queue *q) t->callback = snd_seq_timer_interrupt; t->callback_data = q; t->flags |= SNDRV_TIMER_IFLG_AUTO; + spin_lock_irq(&tmr->lock); tmr->timeri = t; + spin_unlock_irq(&tmr->lock); return 0; } int snd_seq_timer_close(struct snd_seq_queue *q) { struct snd_seq_timer *tmr; + struct snd_timer_instance *t; tmr = q->timer; if (snd_BUG_ON(!tmr)) return -EINVAL; - if (tmr->timeri) { - snd_timer_stop(tmr->timeri); - snd_timer_close(tmr->timeri); - tmr->timeri = NULL; - } + spin_lock_irq(&tmr->lock); + t = tmr->timeri; + tmr->timeri = NULL; + spin_unlock_irq(&tmr->lock); + if (t) + snd_timer_close(t); return 0; } -int snd_seq_timer_stop(struct snd_seq_timer * tmr) +static int seq_timer_stop(struct snd_seq_timer *tmr) { if (! tmr->timeri) return -EINVAL; @@ -326,6 +338,17 @@ int snd_seq_timer_stop(struct snd_seq_timer * tmr) return 0; } +int snd_seq_timer_stop(struct snd_seq_timer *tmr) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&tmr->lock, flags); + err = seq_timer_stop(tmr); + spin_unlock_irqrestore(&tmr->lock, flags); + return err; +} + static int initialize_timer(struct snd_seq_timer *tmr) { struct snd_timer *t; @@ -358,13 +381,13 @@ static int initialize_timer(struct snd_seq_timer *tmr) return 0; } -int snd_seq_timer_start(struct snd_seq_timer * tmr) +static int seq_timer_start(struct snd_seq_timer *tmr) { if (! tmr->timeri) return -EINVAL; if (tmr->running) - snd_seq_timer_stop(tmr); - snd_seq_timer_reset(tmr); + seq_timer_stop(tmr); + seq_timer_reset(tmr); if (initialize_timer(tmr) < 0) return -EINVAL; snd_timer_start(tmr->timeri, tmr->ticks); @@ -373,14 +396,25 @@ int snd_seq_timer_start(struct snd_seq_timer * tmr) return 0; } -int snd_seq_timer_continue(struct snd_seq_timer * tmr) +int snd_seq_timer_start(struct snd_seq_timer *tmr) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&tmr->lock, flags); + err = seq_timer_start(tmr); + spin_unlock_irqrestore(&tmr->lock, flags); + return err; +} + +static int seq_timer_continue(struct snd_seq_timer *tmr) { if (! tmr->timeri) return -EINVAL; if (tmr->running) return -EBUSY; if (! tmr->initialized) { - snd_seq_timer_reset(tmr); + seq_timer_reset(tmr); if (initialize_timer(tmr) < 0) return -EINVAL; } @@ -390,11 +424,24 @@ int snd_seq_timer_continue(struct snd_seq_timer * tmr) return 0; } +int snd_seq_timer_continue(struct snd_seq_timer *tmr) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&tmr->lock, flags); + err = seq_timer_continue(tmr); + spin_unlock_irqrestore(&tmr->lock, flags); + return err; +} + /* return current 'real' time. use timeofday() to get better granularity. */ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) { snd_seq_real_time_t cur_time; + unsigned long flags; + spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; if (tmr->running) { struct timeval tm; @@ -410,7 +457,7 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) } snd_seq_sanity_real_time(&cur_time); } - + spin_unlock_irqrestore(&tmr->lock, flags); return cur_time; } diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 56e0f4cd3f829..81134e067184f 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -155,21 +155,26 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, struct snd_virmidi *vmidi = substream->runtime->private_data; int count, res; unsigned char buf[32], *pbuf; + unsigned long flags; if (up) { vmidi->trigger = 1; if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH && !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) { - snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail); - return; /* ignored */ + while (snd_rawmidi_transmit(substream, buf, + sizeof(buf)) > 0) { + /* ignored */ + } + return; } if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0) return; vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } + spin_lock_irqsave(&substream->runtime->lock, flags); while (1) { - count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf)); + count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf)); if (count <= 0) break; pbuf = buf; @@ -179,16 +184,18 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, snd_midi_event_reset_encode(vmidi->parser); continue; } - snd_rawmidi_transmit_ack(substream, res); + __snd_rawmidi_transmit_ack(substream, res); pbuf += res; count -= res; if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0) - return; + goto out; vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } } } + out: + spin_unlock_irqrestore(&substream->runtime->lock, flags); } else { vmidi->trigger = 0; } @@ -254,9 +261,13 @@ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream) */ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream) { + struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_virmidi *vmidi = substream->runtime->private_data; - snd_midi_event_free(vmidi->parser); + + write_lock_irq(&rdev->filelist_lock); list_del(&vmidi->list); + write_unlock_irq(&rdev->filelist_lock); + snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); return 0; diff --git a/sound/core/timer.c b/sound/core/timer.c index a9a1a047c521f..bf48e71f73cde 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -65,6 +65,7 @@ struct snd_timer_user { int qtail; int qused; int queue_size; + bool disconnected; struct snd_timer_read *queue; struct snd_timer_tread *tqueue; spinlock_t qlock; @@ -73,7 +74,7 @@ struct snd_timer_user { struct timespec tstamp; /* trigger tstamp */ wait_queue_head_t qchange_sleep; struct fasync_struct *fasync; - struct mutex tread_sem; + struct mutex ioctl_lock; }; /* list of timers */ @@ -215,11 +216,13 @@ static void snd_timer_check_master(struct snd_timer_instance *master) slave->slave_id == master->slave_id) { list_move_tail(&slave->open_list, &master->slave_list_head); spin_lock_irq(&slave_active_lock); + spin_lock(&master->timer->lock); slave->master = master; slave->timer = master->timer; if (slave->flags & SNDRV_TIMER_IFLG_RUNNING) list_add_tail(&slave->active_list, &master->slave_active_head); + spin_unlock(&master->timer->lock); spin_unlock_irq(&slave_active_lock); } } @@ -288,6 +291,9 @@ int snd_timer_open(struct snd_timer_instance **ti, mutex_unlock(®ister_mutex); return -ENOMEM; } + /* take a card refcount for safe disconnection */ + if (timer->card) + get_device(&timer->card->card_dev); timeri->slave_class = tid->dev_sclass; timeri->slave_id = slave_id; if (list_empty(&timer->open_list_head) && timer->hw.open) @@ -299,8 +305,7 @@ int snd_timer_open(struct snd_timer_instance **ti, return 0; } -static int _snd_timer_stop(struct snd_timer_instance *timeri, - int keep_flag, int event); +static int _snd_timer_stop(struct snd_timer_instance *timeri, int event); /* * close a timer instance @@ -342,19 +347,25 @@ int snd_timer_close(struct snd_timer_instance *timeri) spin_unlock_irq(&timer->lock); mutex_lock(®ister_mutex); list_del(&timeri->open_list); - if (timer && list_empty(&timer->open_list_head) && + if (list_empty(&timer->open_list_head) && timer->hw.close) timer->hw.close(timer); /* remove slave links */ + spin_lock_irq(&slave_active_lock); + spin_lock(&timer->lock); list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) { - spin_lock_irq(&slave_active_lock); - _snd_timer_stop(slave, 1, SNDRV_TIMER_EVENT_RESOLUTION); list_move_tail(&slave->open_list, &snd_timer_slave_list); slave->master = NULL; slave->timer = NULL; - spin_unlock_irq(&slave_active_lock); + list_del_init(&slave->ack_list); + list_del_init(&slave->active_list); } + spin_unlock(&timer->lock); + spin_unlock_irq(&slave_active_lock); + /* release a card refcount for safe disconnection */ + if (timer->card) + put_device(&timer->card->card_dev); mutex_unlock(®ister_mutex); } out: @@ -411,7 +422,7 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event) spin_lock_irqsave(&timer->lock, flags); list_for_each_entry(ts, &ti->slave_active_head, active_list) if (ts->ccallback) - ts->ccallback(ti, event + 100, &tstamp, resolution); + ts->ccallback(ts, event + 100, &tstamp, resolution); spin_unlock_irqrestore(&timer->lock, flags); } @@ -440,10 +451,17 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri) unsigned long flags; spin_lock_irqsave(&slave_active_lock, flags); + if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) { + spin_unlock_irqrestore(&slave_active_lock, flags); + return -EBUSY; + } timeri->flags |= SNDRV_TIMER_IFLG_RUNNING; - if (timeri->master) + if (timeri->master && timeri->timer) { + spin_lock(&timeri->timer->lock); list_add_tail(&timeri->active_list, &timeri->master->slave_active_head); + spin_unlock(&timeri->timer->lock); + } spin_unlock_irqrestore(&slave_active_lock, flags); return 1; /* delayed start */ } @@ -461,23 +479,32 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks) return -EINVAL; if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) { result = snd_timer_start_slave(timeri); - snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); + if (result >= 0) + snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); return result; } timer = timeri->timer; if (timer == NULL) return -EINVAL; + if (timer->card && timer->card->shutdown) + return -ENODEV; spin_lock_irqsave(&timer->lock, flags); + if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | + SNDRV_TIMER_IFLG_START)) { + result = -EBUSY; + goto unlock; + } timeri->ticks = timeri->cticks = ticks; timeri->pticks = 0; result = snd_timer_start1(timer, timeri, ticks); + unlock: spin_unlock_irqrestore(&timer->lock, flags); - snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); + if (result >= 0) + snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); return result; } -static int _snd_timer_stop(struct snd_timer_instance * timeri, - int keep_flag, int event) +static int _snd_timer_stop(struct snd_timer_instance *timeri, int event) { struct snd_timer *timer; unsigned long flags; @@ -486,19 +513,36 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, return -ENXIO; if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) { - if (!keep_flag) { - spin_lock_irqsave(&slave_active_lock, flags); - timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; + spin_lock_irqsave(&slave_active_lock, flags); + if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) { spin_unlock_irqrestore(&slave_active_lock, flags); + return -EBUSY; } + if (timeri->timer) + spin_lock(&timeri->timer->lock); + timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; + list_del_init(&timeri->ack_list); + list_del_init(&timeri->active_list); + if (timeri->timer) + spin_unlock(&timeri->timer->lock); + spin_unlock_irqrestore(&slave_active_lock, flags); goto __end; } timer = timeri->timer; if (!timer) return -EINVAL; spin_lock_irqsave(&timer->lock, flags); + if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | + SNDRV_TIMER_IFLG_START))) { + spin_unlock_irqrestore(&timer->lock, flags); + return -EBUSY; + } list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); + if (timer->card && timer->card->shutdown) { + spin_unlock_irqrestore(&timer->lock, flags); + return 0; + } if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) && !(--timer->running)) { timer->hw.stop(timer); @@ -511,9 +555,7 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, } } } - if (!keep_flag) - timeri->flags &= - ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); + timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); spin_unlock_irqrestore(&timer->lock, flags); __end: if (event != SNDRV_TIMER_EVENT_RESOLUTION) @@ -532,7 +574,7 @@ int snd_timer_stop(struct snd_timer_instance *timeri) unsigned long flags; int err; - err = _snd_timer_stop(timeri, 0, SNDRV_TIMER_EVENT_STOP); + err = _snd_timer_stop(timeri, SNDRV_TIMER_EVENT_STOP); if (err < 0) return err; timer = timeri->timer; @@ -561,11 +603,18 @@ int snd_timer_continue(struct snd_timer_instance *timeri) timer = timeri->timer; if (! timer) return -EINVAL; + if (timer->card && timer->card->shutdown) + return -ENODEV; spin_lock_irqsave(&timer->lock, flags); + if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) { + result = -EBUSY; + goto unlock; + } if (!timeri->cticks) timeri->cticks = 1; timeri->pticks = 0; result = snd_timer_start1(timer, timeri, timer->sticks); + unlock: spin_unlock_irqrestore(&timer->lock, flags); snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_CONTINUE); return result; @@ -576,7 +625,7 @@ int snd_timer_continue(struct snd_timer_instance *timeri) */ int snd_timer_pause(struct snd_timer_instance * timeri) { - return _snd_timer_stop(timeri, 0, SNDRV_TIMER_EVENT_PAUSE); + return _snd_timer_stop(timeri, SNDRV_TIMER_EVENT_PAUSE); } /* @@ -624,6 +673,9 @@ static void snd_timer_tasklet(unsigned long arg) unsigned long resolution, ticks; unsigned long flags; + if (timer->card && timer->card->shutdown) + return; + spin_lock_irqsave(&timer->lock, flags); /* now process all callbacks */ while (!list_empty(&timer->sack_list_head)) { @@ -664,6 +716,9 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) if (timer == NULL) return; + if (timer->card && timer->card->shutdown) + return; + spin_lock_irqsave(&timer->lock, flags); /* remember the current resolution */ @@ -693,8 +748,8 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) ti->cticks = ti->ticks; } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; - if (--timer->running) - list_del(&ti->active_list); + --timer->running; + list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) || (ti->flags & SNDRV_TIMER_IFLG_FAST)) @@ -874,11 +929,28 @@ static int snd_timer_dev_register(struct snd_device *dev) return 0; } +/* just for reference in snd_timer_dev_disconnect() below */ +static void snd_timer_user_ccallback(struct snd_timer_instance *timeri, + int event, struct timespec *tstamp, + unsigned long resolution); + static int snd_timer_dev_disconnect(struct snd_device *device) { struct snd_timer *timer = device->device_data; + struct snd_timer_instance *ti; + mutex_lock(®ister_mutex); list_del_init(&timer->device_list); + /* wake up pending sleepers */ + list_for_each_entry(ti, &timer->open_list_head, open_list) { + /* FIXME: better to have a ti.disconnect() op */ + if (ti->ccallback == snd_timer_user_ccallback) { + struct snd_timer_user *tu = ti->callback_data; + + tu->disconnected = true; + wake_up(&tu->qchange_sleep); + } + } mutex_unlock(®ister_mutex); return 0; } @@ -889,6 +961,8 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam unsigned long resolution = 0; struct snd_timer_instance *ti, *ts; + if (timer->card && timer->card->shutdown) + return; if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) return; if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART || @@ -1047,6 +1121,8 @@ static void snd_timer_proc_read(struct snd_info_entry *entry, mutex_lock(®ister_mutex); list_for_each_entry(timer, &snd_timer_list, device_list) { + if (timer->card && timer->card->shutdown) + continue; switch (timer->tmr_class) { case SNDRV_TIMER_CLASS_GLOBAL: snd_iprintf(buffer, "G%i: ", timer->tmr_device); @@ -1253,7 +1329,7 @@ static int snd_timer_user_open(struct inode *inode, struct file *file) return -ENOMEM; spin_lock_init(&tu->qlock); init_waitqueue_head(&tu->qchange_sleep); - mutex_init(&tu->tread_sem); + mutex_init(&tu->ioctl_lock); tu->ticks = 1; tu->queue_size = 128; tu->queue = kmalloc(tu->queue_size * sizeof(struct snd_timer_read), @@ -1273,8 +1349,10 @@ static int snd_timer_user_release(struct inode *inode, struct file *file) if (file->private_data) { tu = file->private_data; file->private_data = NULL; + mutex_lock(&tu->ioctl_lock); if (tu->timeri) snd_timer_close(tu->timeri); + mutex_unlock(&tu->ioctl_lock); kfree(tu->queue); kfree(tu->tqueue); kfree(tu); @@ -1512,7 +1590,6 @@ static int snd_timer_user_tselect(struct file *file, int err = 0; tu = file->private_data; - mutex_lock(&tu->tread_sem); if (tu->timeri) { snd_timer_close(tu->timeri); tu->timeri = NULL; @@ -1556,7 +1633,6 @@ static int snd_timer_user_tselect(struct file *file, } __err: - mutex_unlock(&tu->tread_sem); return err; } @@ -1769,7 +1845,7 @@ enum { SNDRV_TIMER_IOCTL_PAUSE_OLD = _IO('T', 0x23), }; -static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, +static long __snd_timer_user_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_timer_user *tu; @@ -1786,17 +1862,11 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, { int xarg; - mutex_lock(&tu->tread_sem); - if (tu->timeri) { /* too late */ - mutex_unlock(&tu->tread_sem); + if (tu->timeri) /* too late */ return -EBUSY; - } - if (get_user(xarg, p)) { - mutex_unlock(&tu->tread_sem); + if (get_user(xarg, p)) return -EFAULT; - } tu->tread = xarg ? 1 : 0; - mutex_unlock(&tu->tread_sem); return 0; } case SNDRV_TIMER_IOCTL_GINFO: @@ -1829,6 +1899,18 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, return -ENOTTY; } +static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct snd_timer_user *tu = file->private_data; + long ret; + + mutex_lock(&tu->ioctl_lock); + ret = __snd_timer_user_ioctl(file, cmd, arg); + mutex_unlock(&tu->ioctl_lock); + return ret; +} + static int snd_timer_user_fasync(int fd, struct file * file, int on) { struct snd_timer_user *tu; @@ -1842,6 +1924,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, { struct snd_timer_user *tu; long result = 0, unit; + int qhead; int err = 0; tu = file->private_data; @@ -1853,7 +1936,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; - break; + goto _error; } set_current_state(TASK_INTERRUPTIBLE); @@ -1866,40 +1949,39 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, remove_wait_queue(&tu->qchange_sleep, &wait); + if (tu->disconnected) { + err = -ENODEV; + goto _error; + } if (signal_pending(current)) { err = -ERESTARTSYS; - break; + goto _error; } } + qhead = tu->qhead++; + tu->qhead %= tu->queue_size; spin_unlock_irq(&tu->qlock); - if (err < 0) - goto _error; if (tu->tread) { - if (copy_to_user(buffer, &tu->tqueue[tu->qhead++], - sizeof(struct snd_timer_tread))) { + if (copy_to_user(buffer, &tu->tqueue[qhead], + sizeof(struct snd_timer_tread))) err = -EFAULT; - goto _error; - } } else { - if (copy_to_user(buffer, &tu->queue[tu->qhead++], - sizeof(struct snd_timer_read))) { + if (copy_to_user(buffer, &tu->queue[qhead], + sizeof(struct snd_timer_read))) err = -EFAULT; - goto _error; - } } - tu->qhead %= tu->queue_size; - - result += unit; - buffer += unit; - spin_lock_irq(&tu->qlock); tu->qused--; + if (err < 0) + goto _error; + result += unit; + buffer += unit; } - spin_unlock_irq(&tu->qlock); _error: + spin_unlock_irq(&tu->qlock); return result > 0 ? result : err; } @@ -1915,6 +1997,8 @@ static unsigned int snd_timer_user_poll(struct file *file, poll_table * wait) mask = 0; if (tu->qused) mask |= POLLIN | POLLRDNORM; + if (tu->disconnected) + mask |= POLLERR; return mask; } diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c index e05802ae6e1b1..2e908225d754c 100644 --- a/sound/core/timer_compat.c +++ b/sound/core/timer_compat.c @@ -70,13 +70,14 @@ static int snd_timer_user_status_compat(struct file *file, struct snd_timer_status32 __user *_status) { struct snd_timer_user *tu; - struct snd_timer_status status; + struct snd_timer_status32 status; tu = file->private_data; if (snd_BUG_ON(!tu->timeri)) return -ENXIO; memset(&status, 0, sizeof(status)); - status.tstamp = tu->tstamp; + status.tstamp.tv_sec = tu->tstamp.tv_sec; + status.tstamp.tv_nsec = tu->tstamp.tv_nsec; status.resolution = snd_timer_resolution(tu->timeri); status.lost = tu->timeri->lost; status.overrun = tu->overrun; @@ -88,12 +89,21 @@ static int snd_timer_user_status_compat(struct file *file, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has the same struct as x86-64 */ +#define snd_timer_user_status_x32(file, s) \ + snd_timer_user_status(file, s) +#endif /* CONFIG_X86_X32 */ + /* */ enum { SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32), SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32), +#ifdef CONFIG_X86_X32 + SNDRV_TIMER_IOCTL_STATUS_X32 = _IOW('T', 0x14, struct snd_timer_status), +#endif /* CONFIG_X86_X32 */ }; static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -122,6 +132,10 @@ static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_timer_user_info_compat(file, argp); case SNDRV_TIMER_IOCTL_STATUS32: return snd_timer_user_status_compat(file, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_TIMER_IOCTL_STATUS_X32: + return snd_timer_user_status_x32(file, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; } diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index d11baaf0f0b4a..c5d5217a41804 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -109,6 +109,9 @@ struct dummy_timer_ops { snd_pcm_uframes_t (*pointer)(struct snd_pcm_substream *); }; +#define get_dummy_ops(substream) \ + (*(const struct dummy_timer_ops **)(substream)->runtime->private_data) + struct dummy_model { const char *name; int (*playback_constraints)(struct snd_pcm_runtime *runtime); @@ -137,7 +140,6 @@ struct snd_dummy { int iobox; struct snd_kcontrol *cd_volume_ctl; struct snd_kcontrol *cd_switch_ctl; - const struct dummy_timer_ops *timer_ops; }; /* @@ -231,6 +233,8 @@ struct dummy_model *dummy_models[] = { */ struct dummy_systimer_pcm { + /* ops must be the first item */ + const struct dummy_timer_ops *timer_ops; spinlock_t lock; struct timer_list timer; unsigned long base_time; @@ -366,6 +370,8 @@ static struct dummy_timer_ops dummy_systimer_ops = { */ struct dummy_hrtimer_pcm { + /* ops must be the first item */ + const struct dummy_timer_ops *timer_ops; ktime_t base_time; ktime_t period_time; atomic_t running; @@ -492,31 +498,25 @@ static struct dummy_timer_ops dummy_hrtimer_ops = { static int dummy_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: - return dummy->timer_ops->start(substream); + return get_dummy_ops(substream)->start(substream); case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: - return dummy->timer_ops->stop(substream); + return get_dummy_ops(substream)->stop(substream); } return -EINVAL; } static int dummy_pcm_prepare(struct snd_pcm_substream *substream) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - - return dummy->timer_ops->prepare(substream); + return get_dummy_ops(substream)->prepare(substream); } static snd_pcm_uframes_t dummy_pcm_pointer(struct snd_pcm_substream *substream) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - - return dummy->timer_ops->pointer(substream); + return get_dummy_ops(substream)->pointer(substream); } static struct snd_pcm_hardware dummy_pcm_hardware = { @@ -562,17 +562,19 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream) struct snd_dummy *dummy = snd_pcm_substream_chip(substream); struct dummy_model *model = dummy->model; struct snd_pcm_runtime *runtime = substream->runtime; + const struct dummy_timer_ops *ops; int err; - dummy->timer_ops = &dummy_systimer_ops; + ops = &dummy_systimer_ops; #ifdef CONFIG_HIGH_RES_TIMERS if (hrtimer) - dummy->timer_ops = &dummy_hrtimer_ops; + ops = &dummy_hrtimer_ops; #endif - err = dummy->timer_ops->create(substream); + err = ops->create(substream); if (err < 0) return err; + get_dummy_ops(substream) = ops; runtime->hw = dummy->pcm_hw; if (substream->pcm->device & 1) { @@ -594,7 +596,7 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream) err = model->capture_constraints(substream->runtime); } if (err < 0) { - dummy->timer_ops->free(substream); + get_dummy_ops(substream)->free(substream); return err; } return 0; @@ -602,8 +604,7 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream) static int dummy_pcm_close(struct snd_pcm_substream *substream) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - dummy->timer_ops->free(substream); + get_dummy_ops(substream)->free(substream); return 0; } diff --git a/sound/firewire/amdtp.c b/sound/firewire/amdtp.c index e061355f535f0..bf20593d30852 100644 --- a/sound/firewire/amdtp.c +++ b/sound/firewire/amdtp.c @@ -730,8 +730,9 @@ static void handle_in_packet(struct amdtp_stream *s, s->data_block_counter != UINT_MAX) data_block_counter = s->data_block_counter; - if (((s->flags & CIP_SKIP_DBC_ZERO_CHECK) && data_block_counter == 0) || - (s->data_block_counter == UINT_MAX)) { + if (((s->flags & CIP_SKIP_DBC_ZERO_CHECK) && + data_block_counter == s->tx_first_dbc) || + s->data_block_counter == UINT_MAX) { lost = false; } else if (!(s->flags & CIP_DBC_IS_END_EVENT)) { lost = data_block_counter != s->data_block_counter; diff --git a/sound/firewire/amdtp.h b/sound/firewire/amdtp.h index 8a03a91e728b0..25c9055376580 100644 --- a/sound/firewire/amdtp.h +++ b/sound/firewire/amdtp.h @@ -153,6 +153,8 @@ struct amdtp_stream { /* quirk: fixed interval of dbc between previos/current packets. */ unsigned int tx_dbc_interval; + /* quirk: indicate the value of dbc field in a first packet. */ + unsigned int tx_first_dbc; bool callbacked; wait_queue_head_t callback_wait; diff --git a/sound/firewire/bebob/Makefile b/sound/firewire/bebob/Makefile index 6cf470c80d1fd..af7ed66432661 100644 --- a/sound/firewire/bebob/Makefile +++ b/sound/firewire/bebob/Makefile @@ -1,4 +1,4 @@ snd-bebob-objs := bebob_command.o bebob_stream.o bebob_proc.o bebob_midi.o \ bebob_pcm.o bebob_hwdep.o bebob_terratec.o bebob_yamaha.o \ bebob_focusrite.o bebob_maudio.o bebob.o -obj-m += snd-bebob.o +obj-$(CONFIG_SND_BEBOB) += snd-bebob.o diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index 98e4fc8121a1f..5e547cb199f0e 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = { [6] = 0x07, }; -static unsigned int -get_formation_index(unsigned int rate) +static int +get_formation_index(unsigned int rate, unsigned int *index) { unsigned int i; for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) { - if (snd_bebob_rate_table[i] == rate) - return i; + if (snd_bebob_rate_table[i] == rate) { + *index = i; + return 0; + } } return -EINVAL; } @@ -367,7 +369,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate) goto end; /* confirm params for both streams */ - index = get_formation_index(rate); + err = get_formation_index(rate, &index); + if (err < 0) + goto end; pcm_channels = bebob->tx_stream_formations[index].pcm; midi_channels = bebob->tx_stream_formations[index].midi; amdtp_stream_set_parameters(&bebob->tx_stream, diff --git a/sound/firewire/dice/Makefile b/sound/firewire/dice/Makefile index 9ef228ef7baf2..55b4be9b00340 100644 --- a/sound/firewire/dice/Makefile +++ b/sound/firewire/dice/Makefile @@ -1,3 +1,3 @@ snd-dice-objs := dice-transaction.o dice-stream.o dice-proc.o dice-midi.o \ dice-pcm.o dice-hwdep.o dice.o -obj-m += snd-dice.o +obj-$(CONFIG_SND_DICE) += snd-dice.o diff --git a/sound/firewire/fireworks/Makefile b/sound/firewire/fireworks/Makefile index 0c7440826db8e..15ef7f75a8ef1 100644 --- a/sound/firewire/fireworks/Makefile +++ b/sound/firewire/fireworks/Makefile @@ -1,4 +1,4 @@ snd-fireworks-objs := fireworks_transaction.o fireworks_command.o \ fireworks_stream.o fireworks_proc.o fireworks_midi.o \ fireworks_pcm.o fireworks_hwdep.o fireworks.o -obj-m += snd-fireworks.o +obj-$(CONFIG_SND_FIREWORKS) += snd-fireworks.o diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index 2682e7e3e5c98..c94a432f7cc65 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -248,8 +248,16 @@ efw_probe(struct fw_unit *unit, err = get_hardware_info(efw); if (err < 0) goto error; + /* AudioFire8 (since 2009) and AudioFirePre8 */ if (entry->model_id == MODEL_ECHO_AUDIOFIRE_9) efw->is_af9 = true; + /* These models uses the same firmware. */ + if (entry->model_id == MODEL_ECHO_AUDIOFIRE_2 || + entry->model_id == MODEL_ECHO_AUDIOFIRE_4 || + entry->model_id == MODEL_ECHO_AUDIOFIRE_9 || + entry->model_id == MODEL_GIBSON_RIP || + entry->model_id == MODEL_GIBSON_GOLDTOP) + efw->is_fireworks3 = true; snd_efw_proc_init(efw); diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h index 4f0201a95222a..084d414b228cf 100644 --- a/sound/firewire/fireworks/fireworks.h +++ b/sound/firewire/fireworks/fireworks.h @@ -71,6 +71,7 @@ struct snd_efw { /* for quirks */ bool is_af9; + bool is_fireworks3; u32 firmware_version; unsigned int midi_in_ports; diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c index c55db1bddc80a..7e353f1f7bff3 100644 --- a/sound/firewire/fireworks/fireworks_stream.c +++ b/sound/firewire/fireworks/fireworks_stream.c @@ -172,6 +172,15 @@ int snd_efw_stream_init_duplex(struct snd_efw *efw) efw->tx_stream.flags |= CIP_DBC_IS_END_EVENT; /* Fireworks reset dbc at bus reset. */ efw->tx_stream.flags |= CIP_SKIP_DBC_ZERO_CHECK; + /* + * But Recent firmwares starts packets with non-zero dbc. + * Driver version 5.7.6 installs firmware version 5.7.3. + */ + if (efw->is_fireworks3 && + (efw->firmware_version == 0x5070000 || + efw->firmware_version == 0x5070300 || + efw->firmware_version == 0x5080000)) + efw->tx_stream.tx_first_dbc = 0x02; /* AudioFire9 always reports wrong dbs. */ if (efw->is_af9) efw->tx_stream.flags |= CIP_WRONG_DBS; diff --git a/sound/firewire/oxfw/Makefile b/sound/firewire/oxfw/Makefile index a926850864f67..06ff50f4e6c0b 100644 --- a/sound/firewire/oxfw/Makefile +++ b/sound/firewire/oxfw/Makefile @@ -1,3 +1,3 @@ snd-oxfw-objs := oxfw-command.o oxfw-stream.o oxfw-control.o oxfw-pcm.o \ oxfw-proc.o oxfw-midi.o oxfw-hwdep.o oxfw.o -obj-m += snd-oxfw.o +obj-$(CONFIG_SND_OXFW) += snd-oxfw.o diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 5645481af3d95..57197bef5f5b9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3259,7 +3259,7 @@ static int add_std_chmaps(struct hda_codec *codec) struct snd_pcm_chmap *chmap; const struct snd_pcm_chmap_elem *elem; - if (!pcm || pcm->own_chmap || + if (!pcm || !pcm->pcm || pcm->own_chmap || !hinfo->substreams) continue; elem = hinfo->chmap ? hinfo->chmap : snd_pcm_std_chmaps; @@ -3833,10 +3833,8 @@ int snd_hda_codec_build_pcms(struct hda_codec *codec) return -EINVAL; err = snd_hda_codec_parse_pcms(codec); - if (err < 0) { - snd_hda_codec_reset(codec); + if (err < 0) return err; - } /* attach a new PCM streams */ list_for_each_entry(cpcm, &codec->pcm_list_head, list) { diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index ac0db1679f098..194627c6c42b3 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -671,7 +671,8 @@ static bool is_active_nid(struct hda_codec *codec, hda_nid_t nid, } for (i = 0; i < path->depth; i++) { if (path->path[i] == nid) { - if (dir == HDA_OUTPUT || path->idx[i] == idx) + if (dir == HDA_OUTPUT || idx == -1 || + path->idx[i] == idx) return true; break; } @@ -682,7 +683,7 @@ static bool is_active_nid(struct hda_codec *codec, hda_nid_t nid, /* check whether the NID is referred by any active paths */ #define is_active_nid_for_any(codec, nid) \ - is_active_nid(codec, nid, HDA_OUTPUT, 0) + is_active_nid(codec, nid, HDA_OUTPUT, -1) /* get the default amp value for the target state */ static int get_amp_val_to_activate(struct hda_codec *codec, hda_nid_t nid, @@ -883,8 +884,7 @@ void snd_hda_activate_path(struct hda_codec *codec, struct nid_path *path, struct hda_gen_spec *spec = codec->spec; int i; - if (!enable) - path->active = false; + path->active = enable; /* make sure the widget is powered up */ if (enable && (spec->power_down_unused || codec->power_save_node)) @@ -902,9 +902,6 @@ void snd_hda_activate_path(struct hda_codec *codec, struct nid_path *path, if (has_amp_out(codec, path, i)) activate_amp_out(codec, path, i, enable); } - - if (enable) - path->active = true; } EXPORT_SYMBOL_GPL(snd_hda_activate_path); @@ -4001,9 +3998,9 @@ static void pin_power_callback(struct hda_codec *codec, struct hda_jack_callback *jack, bool on) { - if (jack && jack->tbl->nid) + if (jack && jack->nid) sync_power_state_change(codec, - set_pin_power_jack(codec, jack->tbl->nid, on)); + set_pin_power_jack(codec, jack->nid, on)); } /* callback only doing power up -- called at first */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b6db25b23dd31..69093ce34231f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -329,6 +329,7 @@ enum { #define AZX_DCAPS_PRESET_CTHDA \ (AZX_DCAPS_NO_MSI | AZX_DCAPS_POSFIX_LPIB |\ + AZX_DCAPS_NO_64BIT |\ AZX_DCAPS_4K_BDLE_BOUNDARY | AZX_DCAPS_SNOOP_OFF) /* @@ -839,6 +840,36 @@ static int azx_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP || SUPPORT_VGA_SWITCHEROO */ +#ifdef CONFIG_PM_SLEEP +/* put codec down to D3 at hibernation for Intel SKL+; + * otherwise BIOS may still access the codec and screw up the driver + */ +#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) +#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) +#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) + +static int azx_freeze_noirq(struct device *dev) +{ + struct pci_dev *pci = to_pci_dev(dev); + + if (IS_SKL_PLUS(pci)) + pci_set_power_state(pci, PCI_D3hot); + + return 0; +} + +static int azx_thaw_noirq(struct device *dev) +{ + struct pci_dev *pci = to_pci_dev(dev); + + if (IS_SKL_PLUS(pci)) + pci_set_power_state(pci, PCI_D0); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_PM static int azx_runtime_suspend(struct device *dev) { @@ -939,6 +970,10 @@ static int azx_runtime_idle(struct device *dev) static const struct dev_pm_ops azx_pm = { SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) +#ifdef CONFIG_PM_SLEEP + .freeze_noirq = azx_freeze_noirq, + .thaw_noirq = azx_thaw_noirq, +#endif SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle) }; @@ -1937,9 +1972,17 @@ static int azx_probe_continue(struct azx *chip) static void azx_remove(struct pci_dev *pci) { struct snd_card *card = pci_get_drvdata(pci); + struct azx *chip; + struct hda_intel *hda; + + if (card) { + /* cancel the pending probing work */ + chip = card->private_data; + hda = container_of(chip, struct hda_intel, chip); + cancel_work_sync(&hda->probe_work); - if (card) snd_card_free(card); + } } static void azx_shutdown(struct pci_dev *pci) @@ -1976,6 +2019,11 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, { PCI_DEVICE(0x8086, 0x8d21), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + /* Lewisburg */ + { PCI_DEVICE(0x8086, 0xa1f0), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + { PCI_DEVICE(0x8086, 0xa270), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Lynx Point-LP */ { PCI_DEVICE(0x8086, 0x9c20), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, @@ -2054,6 +2102,10 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* ATI HDMI */ + { PCI_DEVICE(0x1002, 0x1308), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0x157a), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x793b), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x7919), @@ -2062,6 +2114,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x970f), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, + { PCI_DEVICE(0x1002, 0x9840), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaa00), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0xaa08), @@ -2106,8 +2160,14 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaab0), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaac0), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaac8), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaad8), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaae8), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, /* VIA VT8251/VT8237A */ { PCI_DEVICE(0x1106, 0x3288), .driver_data = AZX_DRIVER_VIA | AZX_DCAPS_POSFIX_VIA }, @@ -2144,11 +2204,13 @@ static const struct pci_device_id azx_ids[] = { .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8, .class_mask = 0xffffff, .driver_data = AZX_DRIVER_CTX | AZX_DCAPS_CTX_WORKAROUND | + AZX_DCAPS_NO_64BIT | AZX_DCAPS_RIRB_PRE_DELAY | AZX_DCAPS_POSFIX_LPIB }, #else /* this entry seems still valid -- i.e. without emu20kx chip */ { PCI_DEVICE(0x1102, 0x0009), .driver_data = AZX_DRIVER_CTX | AZX_DCAPS_CTX_WORKAROUND | + AZX_DCAPS_NO_64BIT | AZX_DCAPS_RIRB_PRE_DELAY | AZX_DCAPS_POSFIX_LPIB }, #endif /* CM8888 */ diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c index d7cfe7b8c32b1..52cc36758dd42 100644 --- a/sound/pci/hda/hda_jack.c +++ b/sound/pci/hda/hda_jack.c @@ -259,7 +259,7 @@ snd_hda_jack_detect_enable_callback(struct hda_codec *codec, hda_nid_t nid, if (!callback) return ERR_PTR(-ENOMEM); callback->func = func; - callback->tbl = jack; + callback->nid = jack->nid; callback->next = jack->callback; jack->callback = callback; } diff --git a/sound/pci/hda/hda_jack.h b/sound/pci/hda/hda_jack.h index b279e327a23b3..a13c11c3ddbb6 100644 --- a/sound/pci/hda/hda_jack.h +++ b/sound/pci/hda/hda_jack.h @@ -21,7 +21,7 @@ struct hda_jack_callback; typedef void (*hda_jack_callback_fn) (struct hda_codec *, struct hda_jack_callback *); struct hda_jack_callback { - struct hda_jack_tbl *tbl; + hda_nid_t nid; hda_jack_callback_fn func; unsigned int private_data; /* arbitrary data */ struct hda_jack_callback *next; diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 4a4e7b282e4f8..0374bd5b61c82 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -4401,13 +4401,16 @@ static void ca0132_process_dsp_response(struct hda_codec *codec, static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) { struct ca0132_spec *spec = codec->spec; + struct hda_jack_tbl *tbl; /* Delay enabling the HP amp, to let the mic-detection * state machine run. */ cancel_delayed_work_sync(&spec->unsol_hp_work); schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); - cb->tbl->block_report = 1; + tbl = snd_hda_jack_tbl_get(codec, cb->nid); + if (tbl) + tbl->block_report = 1; } static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 50e9dd6755797..8f50a257a80d6 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -614,6 +614,7 @@ enum { CS4208_MAC_AUTO, CS4208_MBA6, CS4208_MBP11, + CS4208_MACMINI, CS4208_GPIO0, }; @@ -621,6 +622,7 @@ static const struct hda_model_fixup cs4208_models[] = { { .id = CS4208_GPIO0, .name = "gpio0" }, { .id = CS4208_MBA6, .name = "mba6" }, { .id = CS4208_MBP11, .name = "mbp11" }, + { .id = CS4208_MACMINI, .name = "macmini" }, {} }; @@ -632,8 +634,10 @@ static const struct snd_pci_quirk cs4208_fixup_tbl[] = { /* codec SSID matching */ static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11), + SND_PCI_QUIRK(0x106b, 0x6c00, "MacMini 7,1", CS4208_MACMINI), SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6), SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6), + SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11), {} /* terminator */ }; @@ -665,6 +669,24 @@ static void cs4208_fixup_mac(struct hda_codec *codec, snd_hda_apply_fixup(codec, action); } +/* MacMini 7,1 has the inverted jack detection */ +static void cs4208_fixup_macmini(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x18, 0x00ab9150 }, /* mic (audio-in) jack: disable detect */ + { 0x21, 0x004be140 }, /* SPDIF: disable detect */ + { } + }; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + /* HP pin (0x10) has an inverted detection */ + codec->inv_jack_detect = 1; + /* disable the bogus Mic and SPDIF jack detections */ + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static int cs4208_spdif_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -708,6 +730,12 @@ static const struct hda_fixup cs4208_fixups[] = { .chained = true, .chain_id = CS4208_GPIO0, }, + [CS4208_MACMINI] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs4208_fixup_macmini, + .chained = true, + .chain_id = CS4208_GPIO0, + }, [CS4208_GPIO0] = { .type = HDA_FIXUP_FUNC, .v.func = cs4208_fixup_gpio0, @@ -1001,9 +1029,7 @@ static void cs4210_spdif_automute(struct hda_codec *codec, spec->spdif_present = spdif_present; /* SPDIF TX on/off */ - if (spdif_present) - snd_hda_set_pin_ctl(codec, spdif_pin, - spdif_present ? PIN_OUT : 0); + snd_hda_set_pin_ctl(codec, spdif_pin, spdif_present ? PIN_OUT : 0); cs_automute(codec); } diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 78b719b5b34dd..488f4c7be33eb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -200,12 +200,33 @@ static int cx_auto_init(struct hda_codec *codec) return 0; } -#define cx_auto_free snd_hda_gen_free +static void cx_auto_reboot_notify(struct hda_codec *codec) +{ + struct conexant_spec *spec = codec->spec; + + if (codec->core.vendor_id != 0x14f150f2) + return; + + /* Turn the CX20722 codec into D3 to avoid spurious noises + from the internal speaker during (and after) reboot */ + cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); + + snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); + snd_hda_codec_write(codec, codec->core.afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); +} + +static void cx_auto_free(struct hda_codec *codec) +{ + cx_auto_reboot_notify(codec); + snd_hda_gen_free(codec); +} static const struct hda_codec_ops cx_auto_patch_ops = { .build_controls = cx_auto_build_controls, .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, + .reboot_notify = cx_auto_reboot_notify, .free = cx_auto_free, .unsol_event = snd_hda_jack_unsol_event, #ifdef CONFIG_PM @@ -798,6 +819,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), + SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 5f44f60a63897..51d519554744e 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -48,8 +48,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); #define is_haswell(codec) ((codec)->core.vendor_id == 0x80862807) #define is_broadwell(codec) ((codec)->core.vendor_id == 0x80862808) #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809) +#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a) #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ - || is_skylake(codec)) + || is_skylake(codec) || is_broxton(codec)) #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) @@ -432,7 +433,8 @@ static int hdmi_eld_ctl_get(struct snd_kcontrol *kcontrol, eld = &per_pin->sink_eld; mutex_lock(&per_pin->lock); - if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data)) { + if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data) || + eld->eld_size > ELD_MAX_SIZE) { mutex_unlock(&per_pin->lock); snd_BUG(); return -EINVAL; @@ -1177,7 +1179,7 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid) static void jack_callback(struct hda_codec *codec, struct hda_jack_callback *jack) { - check_presence_and_report(codec, jack->tbl->nid); + check_presence_and_report(codec, jack->nid); } static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res) @@ -3333,6 +3335,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x10de0070, .name = "GPU 70 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0071, .name = "GPU 71 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0072, .name = "GPU 72 HDMI/DP", .patch = patch_nvhdmi }, +{ .id = 0x10de007d, .name = "GPU 7d HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x11069f80, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, { .id = 0x11069f81, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, @@ -3396,6 +3399,7 @@ MODULE_ALIAS("snd-hda-codec-id:10de0067"); MODULE_ALIAS("snd-hda-codec-id:10de0070"); MODULE_ALIAS("snd-hda-codec-id:10de0071"); MODULE_ALIAS("snd-hda-codec-id:10de0072"); +MODULE_ALIAS("snd-hda-codec-id:10de007d"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); MODULE_ALIAS("snd-hda-codec-id:11069f80"); MODULE_ALIAS("snd-hda-codec-id:11069f81"); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6d010452c1f5c..91cc6897d595e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -111,6 +111,7 @@ struct alc_spec { void (*power_hook)(struct hda_codec *codec); #endif void (*shutup)(struct hda_codec *codec); + void (*reboot_notify)(struct hda_codec *codec); int init_amp; int codec_variant; /* flag for other variants */ @@ -276,7 +277,7 @@ static void alc_update_knob_master(struct hda_codec *codec, uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (!uctl) return; - val = snd_hda_codec_read(codec, jack->tbl->nid, 0, + val = snd_hda_codec_read(codec, jack->nid, 0, AC_VERB_GET_VOLUME_KNOB_CONTROL, 0); val &= HDA_AMP_VOLMASK; uctl->value.integer.value[0] = val; @@ -773,6 +774,25 @@ static inline void alc_shutup(struct hda_codec *codec) snd_hda_shutup_pins(codec); } +static void alc_reboot_notify(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (spec && spec->reboot_notify) + spec->reboot_notify(codec); + else + alc_shutup(codec); +} + +/* power down codec to D3 at reboot/shutdown; set as reboot_notify ops */ +static void alc_d3_at_reboot(struct hda_codec *codec) +{ + snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); + snd_hda_codec_write(codec, codec->core.afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + msleep(10); +} + #define alc_free snd_hda_gen_free #ifdef CONFIG_PM @@ -818,7 +838,7 @@ static const struct hda_codec_ops alc_patch_ops = { .suspend = alc_suspend, .check_power_status = snd_hda_gen_check_power_status, #endif - .reboot_notify = alc_shutup, + .reboot_notify = alc_reboot_notify, }; @@ -1134,7 +1154,7 @@ static const struct hda_fixup alc880_fixups[] = { /* override all pins as BIOS on old Amilo is broken */ .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x0121411f }, /* HP */ + { 0x14, 0x0121401f }, /* HP */ { 0x15, 0x99030120 }, /* speaker */ { 0x16, 0x99030130 }, /* bass speaker */ { 0x17, 0x411111f0 }, /* N/A */ @@ -1154,7 +1174,7 @@ static const struct hda_fixup alc880_fixups[] = { /* almost compatible with FUJITSU, but no bass and SPDIF */ .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x0121411f }, /* HP */ + { 0x14, 0x0121401f }, /* HP */ { 0x15, 0x99030120 }, /* speaker */ { 0x16, 0x411111f0 }, /* N/A */ { 0x17, 0x411111f0 }, /* N/A */ @@ -1363,7 +1383,7 @@ static const struct snd_pci_quirk alc880_fixup_tbl[] = { SND_PCI_QUIRK(0x161f, 0x203d, "W810", ALC880_FIXUP_W810), SND_PCI_QUIRK(0x161f, 0x205d, "Medion Rim 2150", ALC880_FIXUP_MEDION_RIM), SND_PCI_QUIRK(0x1631, 0xe011, "PB 13201056", ALC880_FIXUP_6ST_AUTOMUTE), - SND_PCI_QUIRK(0x1734, 0x107c, "FSC F1734", ALC880_FIXUP_F1734), + SND_PCI_QUIRK(0x1734, 0x107c, "FSC Amilo M1437", ALC880_FIXUP_FUJITSU), SND_PCI_QUIRK(0x1734, 0x1094, "FSC Amilo M1451G", ALC880_FIXUP_FUJITSU), SND_PCI_QUIRK(0x1734, 0x10ac, "FSC AMILO Xi 1526", ALC880_FIXUP_F1734), SND_PCI_QUIRK(0x1734, 0x10b0, "FSC Amilo Pi1556", ALC880_FIXUP_FUJITSU), @@ -1767,6 +1787,7 @@ enum { ALC889_FIXUP_MBA11_VREF, ALC889_FIXUP_MBA21_VREF, ALC889_FIXUP_MP11_VREF, + ALC889_FIXUP_MP41_VREF, ALC882_FIXUP_INV_DMIC, ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, @@ -1854,7 +1875,7 @@ static void alc889_fixup_mbp_vref(struct hda_codec *codec, const struct hda_fixup *fix, int action) { struct alc_spec *spec = codec->spec; - static hda_nid_t nids[2] = { 0x14, 0x15 }; + static hda_nid_t nids[3] = { 0x14, 0x15, 0x19 }; int i; if (action != HDA_FIXUP_ACT_INIT) @@ -2142,6 +2163,12 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC885_FIXUP_MACPRO_GPIO, }, + [ALC889_FIXUP_MP41_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc889_fixup_mbp_vref, + .chained = true, + .chain_id = ALC885_FIXUP_MACPRO_GPIO, + }, [ALC882_FIXUP_INV_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic, @@ -2201,6 +2228,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), + SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), /* All Apple entries are in codec SSIDs */ SND_PCI_QUIRK(0x106b, 0x00a0, "MacBookPro 3,1", ALC889_FIXUP_MBP_VREF), @@ -2220,11 +2248,11 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x3f00, "Macbook 5,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF), - SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO), + SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 4,1/5,1", ALC889_FIXUP_MP41_VREF), SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF), - SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF), + SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), @@ -3449,6 +3477,29 @@ static void gpio2_mic_hotkey_event(struct hda_codec *codec, input_sync(spec->kb_dev); } +static int alc_register_micmute_input_device(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + spec->kb_dev = input_allocate_device(); + if (!spec->kb_dev) { + codec_err(codec, "Out of memory (input_allocate_device)\n"); + return -ENOMEM; + } + spec->kb_dev->name = "Microphone Mute Button"; + spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY); + spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE); + + if (input_register_device(spec->kb_dev)) { + codec_err(codec, "input_register_device failed\n"); + input_free_device(spec->kb_dev); + spec->kb_dev = NULL; + return -ENOMEM; + } + + return 0; +} + static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3466,20 +3517,8 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { - spec->kb_dev = input_allocate_device(); - if (!spec->kb_dev) { - codec_err(codec, "Out of memory (input_allocate_device)\n"); + if (alc_register_micmute_input_device(codec) != 0) return; - } - spec->kb_dev->name = "Microphone Mute Button"; - spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY); - spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE); - if (input_register_device(spec->kb_dev)) { - codec_err(codec, "input_register_device failed\n"); - input_free_device(spec->kb_dev); - spec->kb_dev = NULL; - return; - } snd_hda_add_verbs(codec, gpio_init); snd_hda_codec_write_cache(codec, codec->core.afg, 0, @@ -3509,6 +3548,47 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec, } } +static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + /* Line2 = mic mute hotkey + GPIO2 = mic mute LED */ + static const struct hda_verb gpio_init[] = { + { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 }, + {} + }; + + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + if (alc_register_micmute_input_device(codec) != 0) + return; + + snd_hda_add_verbs(codec, gpio_init); + snd_hda_jack_detect_enable_callback(codec, 0x1b, + gpio2_mic_hotkey_event); + + spec->gen.cap_sync_hook = alc_fixup_gpio_mic_mute_hook; + spec->gpio_led = 0; + spec->mute_led_polarity = 0; + spec->gpio_mic_led_mask = 0x04; + return; + } + + if (!spec->kb_dev) + return; + + switch (action) { + case HDA_FIXUP_ACT_PROBE: + spec->init_amp = ALC_INIT_DEFAULT; + break; + case HDA_FIXUP_ACT_FREE: + input_unregister_device(spec->kb_dev); + spec->kb_dev = NULL; + } +} + static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3692,6 +3772,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, static void alc_headset_mode_default(struct hda_codec *codec) { + static struct coef_fw coef0225[] = { + UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10), + {} + }; static struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xc089), WRITE_COEF(0x45, 0xc489), @@ -3733,6 +3817,9 @@ static void alc_headset_mode_default(struct hda_codec *codec) }; switch (codec->core.vendor_id) { + case 0x10ec0225: + alc_process_coef_fw(codec, coef0225); + break; case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4182,6 +4269,26 @@ static void alc_fixup_disable_aamix(struct hda_codec *codec, } } +/* fixup for Thinkpad docks: add dock pins, avoid HP parser fixup */ +static void alc_fixup_tpt440_dock(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x16, 0x21211010 }, /* dock headphone */ + { 0x19, 0x21a11010 }, /* dock mic */ + { } + }; + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->shutup = alc_no_shutup; /* reduce click noise */ + spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */ + spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; + codec->power_save_node = 0; /* avoid click noises */ + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4458,6 +4565,7 @@ enum { ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_LIFEBOOK_HP_PIN, + ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4478,6 +4586,7 @@ enum { ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, + ALC269_FIXUP_ASPIRE_HEADSET_MIC, ALC269_FIXUP_ASUS_X101_FUNC, ALC269_FIXUP_ASUS_X101_VERB, ALC269_FIXUP_ASUS_X101, @@ -4505,6 +4614,7 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, + ALC292_FIXUP_TPT440, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -4515,8 +4625,18 @@ enum { ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13_GPIO6, + ALC288_FIXUP_DELL_XPS_13, + ALC288_FIXUP_DISABLE_AAMIX, ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, + ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC275_FIXUP_DELL_XPS, + ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, + ALC293_FIXUP_LENOVO_SPK_NOISE, + ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, + ALC255_FIXUP_DELL_SPK_NOISE, + ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC280_FIXUP_HP_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -4623,6 +4743,10 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4751,6 +4875,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_no_hp_mic, }, + [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4953,15 +5086,17 @@ static const struct hda_fixup alc269_fixups[] = { .chain_id = ALC269_FIXUP_HEADSET_MODE }, [ALC292_FIXUP_TPT440_DOCK] = { - .type = HDA_FIXUP_PINS, - .v.pins = (const struct hda_pintbl[]) { - { 0x16, 0x21211010 }, /* dock headphone */ - { 0x19, 0x21a11010 }, /* dock mic */ - { } - }, + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_tpt440_dock, .chained = true, .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST }, + [ALC292_FIXUP_TPT440] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC292_FIXUP_TPT440_DOCK, + }, [ALC283_FIXUP_BXBT2807_MIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4980,7 +5115,7 @@ static const struct hda_fixup alc269_fixups[] = { { 0x14, 0x90170110 }, { 0x17, 0x40000008 }, { 0x18, 0x411111f0 }, - { 0x19, 0x411111f0 }, + { 0x19, 0x01a1913c }, { 0x1a, 0x411111f0 }, { 0x1b, 0x411111f0 }, { 0x1d, 0x40f89b2d }, @@ -5039,9 +5174,23 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC288_FIXUP_DISABLE_AAMIX] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6 + }, + [ALC288_FIXUP_DELL_XPS_13] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_dell_xps13, + .chained = true, + .chain_id = ALC288_FIXUP_DISABLE_AAMIX + }, [ALC292_FIXUP_DISABLE_AAMIX] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE }, [ALC292_FIXUP_DELL_E7X] = { .type = HDA_FIXUP_FUNC, @@ -5049,6 +5198,71 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC292_FIXUP_DISABLE_AAMIX }, + [ALC298_FIXUP_DELL1_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01a1913d }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, + [ALC275_FIXUP_DELL_XPS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Enables internal speaker */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x1f}, + {0x20, AC_VERB_SET_PROC_COEF, 0x00c0}, + {0x20, AC_VERB_SET_COEF_INDEX, 0x30}, + {0x20, AC_VERB_SET_PROC_COEF, 0x00b1}, + {} + } + }, + [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Disable pass-through path for FRONT 14h */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x36}, + {0x20, AC_VERB_SET_PROC_COEF, 0x1737}, + {} + }, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, + [ALC293_FIXUP_LENOVO_SPK_NOISE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI + }, + [ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_lenovo_line2_mic_hotkey, + }, + [ALC255_FIXUP_DELL_SPK_NOISE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, + [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Disable pass-through path for FRONT 14h */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x36 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 }, + {} + }, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, + [ALC280_FIXUP_HP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5056,11 +5270,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700), + SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), + SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), + SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER), @@ -5069,13 +5288,23 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), + SND_PCI_QUIRK(0x1028, 0x062c, "Dell Latitude E5550", ALC292_FIXUP_DELL_E7X), + SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X), + SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), + SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x06db, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06dd, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -5135,6 +5364,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -5156,6 +5386,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), @@ -5172,13 +5403,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x2208, "Thinkpad T431s", ALC269_FIXUP_LENOVO_DOCK), - SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440), SND_PCI_QUIRK(0x17aa, 0x220e, "Thinkpad T440p", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2210, "Thinkpad T540p", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad T440", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad X240", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), + SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -5188,6 +5423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), @@ -5266,8 +5502,12 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"}, {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"}, {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, + {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, {} }; +#define ALC225_STANDARD_PINS \ + {0x12, 0xb7a60130}, \ + {0x21, 0x04211020} #define ALC255_STANDARD_PINS \ {0x18, 0x411111f0}, \ @@ -5317,7 +5557,20 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x1d, 0x40700001}, \ {0x1e, 0x411111f0} +#define ALC298_STANDARD_PINS \ + {0x18, 0x411111f0}, \ + {0x19, 0x411111f0}, \ + {0x1a, 0x411111f0}, \ + {0x1e, 0x411111f0}, \ + {0x1f, 0x411111f0} + static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC225_STANDARD_PINS, + {0x14, 0x901701a0}), + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC225_STANDARD_PINS, + {0x14, 0x901701b0}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, ALC255_STANDARD_PINS, {0x12, 0x40300000}, @@ -5339,6 +5592,17 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x40000000}, {0x1d, 0x40700001}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x40000000}, + {0x14, 0x90170130}, + {0x17, 0x411111f0}, + {0x18, 0x411111f0}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x01014020}, + {0x1d, 0x4054c029}, + {0x1e, 0x411111f0}, + {0x21, 0x0221103f}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, @@ -5380,6 +5644,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x02211040}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC255_STANDARD_PINS, + {0x12, 0x90a60170}, + {0x14, 0x90171130}, + {0x21, 0x02211040}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60170}, {0x14, 0x90170140}, {0x17, 0x40000000}, @@ -5596,6 +5864,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x16, 0x411111f0}, {0x18, 0x411111f0}, {0x19, 0x411111f0}), + SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC298_STANDARD_PINS, + {0x12, 0x90a60130}, + {0x13, 0x40000000}, + {0x14, 0x411111f0}, + {0x17, 0x90170140}, + {0x1d, 0x4068a36d}, + {0x21, 0x03211020}), {} }; @@ -6389,6 +6665,7 @@ static const struct hda_fixup alc662_fixups[] = { static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x1025, 0x0241, "Packard Bell DOTS", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), @@ -6398,6 +6675,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13), SND_PCI_QUIRK(0x1028, 0x060a, "Dell XPS 13", ALC668_FIXUP_DELL_XPS13), + SND_PCI_QUIRK(0x1028, 0x060d, "Dell M3800", ALC668_FIXUP_DELL_XPS13), SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0696, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), @@ -6405,6 +6683,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A), + SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 6c66d7e164391..840178a26a6b4 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -493,9 +493,9 @@ static void jack_update_power(struct hda_codec *codec, if (!spec->num_pwrs) return; - if (jack && jack->tbl->nid) { - stac_toggle_power_map(codec, jack->tbl->nid, - snd_hda_jack_detect(codec, jack->tbl->nid), + if (jack && jack->nid) { + stac_toggle_power_map(codec, jack->nid, + snd_hda_jack_detect(codec, jack->nid), true); return; } @@ -702,6 +702,7 @@ static bool hp_bnb2011_with_dock(struct hda_codec *codec) static bool hp_blike_system(u32 subsystem_id) { switch (subsystem_id) { + case 0x103c1473: /* HP ProBook 6550b */ case 0x103c1520: case 0x103c1521: case 0x103c1523: @@ -2920,7 +2921,8 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x148a, "HP Mini", STAC_92HD83XXX_HP_LED), SND_PCI_QUIRK_VENDOR(PCI_VENDOR_ID_HP, "HP", STAC_92HD83XXX_HP), - SND_PCI_QUIRK(PCI_VENDOR_ID_TOSHIBA, 0xfa91, + /* match both for 0xfa91 and 0xfa93 */ + SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_TOSHIBA, 0xfffd, 0xfa91, "Toshiba Satellite S50D", STAC_92HD83XXX_GPIO10_EAPD), {} /* terminator */ }; @@ -3108,6 +3110,29 @@ static void stac92hd71bxx_fixup_hp_hdx(struct hda_codec *codec, spec->gpio_led = 0x08; } +static bool is_hp_output(struct hda_codec *codec, hda_nid_t pin) +{ + unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin); + + /* count line-out, too, as BIOS sets often so */ + return get_defcfg_connect(pin_cfg) != AC_JACK_PORT_NONE && + (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT || + get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT); +} + +static void fixup_hp_headphone(struct hda_codec *codec, hda_nid_t pin) +{ + unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin); + + /* It was changed in the BIOS to just satisfy MS DTM. + * Lets turn it back into slaved HP + */ + pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE)) | + (AC_JACK_HP_OUT << AC_DEFCFG_DEVICE_SHIFT); + pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC | AC_DEFCFG_SEQUENCE))) | + 0x1f; + snd_hda_codec_set_pincfg(codec, pin, pin_cfg); +} static void stac92hd71bxx_fixup_hp(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -3117,22 +3142,12 @@ static void stac92hd71bxx_fixup_hp(struct hda_codec *codec, if (action != HDA_FIXUP_ACT_PRE_PROBE) return; - if (hp_blike_system(codec->core.subsystem_id)) { - unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, 0x0f); - if (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT || - get_defcfg_device(pin_cfg) == AC_JACK_SPEAKER || - get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT) { - /* It was changed in the BIOS to just satisfy MS DTM. - * Lets turn it back into slaved HP - */ - pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE)) - | (AC_JACK_HP_OUT << - AC_DEFCFG_DEVICE_SHIFT); - pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC - | AC_DEFCFG_SEQUENCE))) - | 0x1f; - snd_hda_codec_set_pincfg(codec, 0x0f, pin_cfg); - } + /* when both output A and F are assigned, these are supposedly + * dock and built-in headphones; fix both pin configs + */ + if (is_hp_output(codec, 0x0a) && is_hp_output(codec, 0x0f)) { + fixup_hp_headphone(codec, 0x0a); + fixup_hp_headphone(codec, 0x0f); } if (find_mute_led_cfg(codec, 1)) @@ -4521,7 +4536,11 @@ static int patch_stac92hd73xx(struct hda_codec *codec) return err; spec = codec->spec; - codec->power_save_node = 1; + /* enable power_save_node only for new 92HD89xx chips, as it causes + * click noises on old 92HD73xx chips. + */ + if ((codec->core.vendor_id & 0xfffffff0) != 0x111d7670) + codec->power_save_node = 1; spec->linear_tone_beep = 0; spec->gen.mixer_nid = 0x1d; spec->have_spdif_mux = 1; diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index bab6c04932aa0..0baeecc2213c5 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -238,7 +238,9 @@ static int via_pin_power_ctl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = codec->power_save_node; + struct via_spec *spec = codec->spec; + + ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused; return 0; } @@ -249,9 +251,9 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol, struct via_spec *spec = codec->spec; bool val = !!ucontrol->value.enumerated.item[0]; - if (val == codec->power_save_node) + if (val == spec->gen.power_down_unused) return 0; - codec->power_save_node = val; + /* codec->power_save_node = val; */ /* widget PM seems yet broken */ spec->gen.power_down_unused = val; analog_low_current_mode(codec); return 1; diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 2306ccf7281e2..77c963ced67a3 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -741,10 +741,11 @@ snd_rme96_playback_setrate(struct rme96 *rme96, { /* change to/from double-speed: reset the DAC (if available) */ snd_rme96_reset_dac(rme96); + return 1; /* need to restore volume */ } else { writel(rme96->wcreg, rme96->iobase + RME96_IO_CONTROL_REGISTER); + return 0; } - return 0; } static int @@ -980,6 +981,7 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream, struct rme96 *rme96 = snd_pcm_substream_chip(substream); struct snd_pcm_runtime *runtime = substream->runtime; int err, rate, dummy; + bool apply_dac_volume = false; runtime->dma_area = (void __force *)(rme96->iobase + RME96_IO_PLAY_BUFFER); @@ -993,24 +995,26 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream, { /* slave clock */ if ((int)params_rate(params) != rate) { - spin_unlock_irq(&rme96->lock); - return -EIO; - } - } else if ((err = snd_rme96_playback_setrate(rme96, params_rate(params))) < 0) { - spin_unlock_irq(&rme96->lock); - return err; - } - if ((err = snd_rme96_playback_setformat(rme96, params_format(params))) < 0) { - spin_unlock_irq(&rme96->lock); - return err; + err = -EIO; + goto error; + } + } else { + err = snd_rme96_playback_setrate(rme96, params_rate(params)); + if (err < 0) + goto error; + apply_dac_volume = err > 0; /* need to restore volume later? */ } + + err = snd_rme96_playback_setformat(rme96, params_format(params)); + if (err < 0) + goto error; snd_rme96_setframelog(rme96, params_channels(params), 1); if (rme96->capture_periodsize != 0) { if (params_period_size(params) << rme96->playback_frlog != rme96->capture_periodsize) { - spin_unlock_irq(&rme96->lock); - return -EBUSY; + err = -EBUSY; + goto error; } } rme96->playback_periodsize = @@ -1021,9 +1025,16 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream, rme96->wcreg &= ~(RME96_WCR_PRO | RME96_WCR_DOLBY | RME96_WCR_EMP); writel(rme96->wcreg |= rme96->wcreg_spdif_stream, rme96->iobase + RME96_IO_CONTROL_REGISTER); } + + err = 0; + error: spin_unlock_irq(&rme96->lock); - - return 0; + if (apply_dac_volume) { + usleep_range(3000, 10000); + snd_rme96_apply_dac_volume(rme96); + } + + return err; } static int diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index c19e021ccf668..11246280945dd 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -2878,7 +2878,7 @@ static int snd_hdsp_get_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl { struct hdsp *hdsp = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = hdsp_dds_offset(hdsp); + ucontrol->value.integer.value[0] = hdsp_dds_offset(hdsp); return 0; } @@ -2890,7 +2890,7 @@ static int snd_hdsp_put_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl if (!snd_hdsp_use_is_exclusive(hdsp)) return -EBUSY; - val = ucontrol->value.enumerated.item[0]; + val = ucontrol->value.integer.value[0]; spin_lock_irq(&hdsp->lock); if (val != hdsp_dds_offset(hdsp)) change = (hdsp_set_dds_offset(hdsp, val) == 0) ? 1 : 0; diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index cb666c73712d1..7f6190606f5e7 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1601,6 +1601,9 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate) { u64 n; + if (snd_BUG_ON(rate <= 0)) + return; + if (rate >= 112000) rate /= 4; else if (rate >= 56000) @@ -2215,6 +2218,8 @@ static int hdspm_get_system_sample_rate(struct hdspm *hdspm) } else { /* slave mode, return external sample rate */ rate = hdspm_external_sample_rate(hdspm); + if (!rate) + rate = hdspm->system_sample_rate; } } @@ -2260,8 +2265,11 @@ static int snd_hdspm_put_system_sample_rate(struct snd_kcontrol *kcontrol, ucontrol) { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + int rate = ucontrol->value.integer.value[0]; - hdspm_set_dds_value(hdspm, ucontrol->value.enumerated.item[0]); + if (rate < 27000 || rate > 207000) + return -EINVAL; + hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]); return 0; } @@ -4449,7 +4457,7 @@ static int snd_hdspm_get_tco_word_term(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = hdspm->tco->term; + ucontrol->value.integer.value[0] = hdspm->tco->term; return 0; } @@ -4460,8 +4468,8 @@ static int snd_hdspm_put_tco_word_term(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - if (hdspm->tco->term != ucontrol->value.enumerated.item[0]) { - hdspm->tco->term = ucontrol->value.enumerated.item[0]; + if (hdspm->tco->term != ucontrol->value.integer.value[0]) { + hdspm->tco->term = ucontrol->value.integer.value[0]; hdspm_tco_write(hdspm); diff --git a/sound/soc/au1x/db1200.c b/sound/soc/au1x/db1200.c index c75995f2779cf..b914a08258ea0 100644 --- a/sound/soc/au1x/db1200.c +++ b/sound/soc/au1x/db1200.c @@ -129,6 +129,8 @@ static struct snd_soc_dai_link db1300_i2s_dai = { .cpu_dai_name = "au1xpsc_i2s.2", .platform_name = "au1xpsc-pcm.2", .codec_name = "wm8731.0-001b", + .dai_fmt = SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBM_CFM, .ops = &db1200_i2s_wm8731_ops, }; @@ -146,6 +148,8 @@ static struct snd_soc_dai_link db1550_i2s_dai = { .cpu_dai_name = "au1xpsc_i2s.3", .platform_name = "au1xpsc-pcm.3", .codec_name = "wm8731.0-001b", + .dai_fmt = SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBM_CFM, .ops = &db1200_i2s_wm8731_ops, }; diff --git a/sound/soc/codecs/adav80x.c b/sound/soc/codecs/adav80x.c index 4373ada95648e..3a91a00fb9736 100644 --- a/sound/soc/codecs/adav80x.c +++ b/sound/soc/codecs/adav80x.c @@ -864,7 +864,6 @@ const struct regmap_config adav80x_regmap_config = { .val_bits = 8, .pad_bits = 1, .reg_bits = 7, - .read_flag_mask = 0x01, .max_register = ADAV80X_PLL_OUTE, diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index eff4b4d512b7b..13191891fc4cc 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1354,7 +1354,7 @@ static int arizona_hw_params(struct snd_pcm_substream *substream, bool reconfig; unsigned int aif_tx_state, aif_rx_state; - if (params_rate(params) % 8000) + if (params_rate(params) % 4000) rates = &arizona_44k1_bclk_rates[0]; else rates = &arizona_48k_bclk_rates[0]; @@ -1610,17 +1610,6 @@ int arizona_init_dai(struct arizona_priv *priv, int id) } EXPORT_SYMBOL_GPL(arizona_init_dai); -static irqreturn_t arizona_fll_clock_ok(int irq, void *data) -{ - struct arizona_fll *fll = data; - - arizona_fll_dbg(fll, "clock OK\n"); - - complete(&fll->ok); - - return IRQ_HANDLED; -} - static struct { unsigned int min; unsigned int max; @@ -1902,17 +1891,18 @@ static int arizona_is_enabled_fll(struct arizona_fll *fll) static int arizona_enable_fll(struct arizona_fll *fll) { struct arizona *arizona = fll->arizona; - unsigned long time_left; bool use_sync = false; int already_enabled = arizona_is_enabled_fll(fll); struct arizona_fll_cfg cfg; + int i; + unsigned int val; if (already_enabled < 0) return already_enabled; if (already_enabled) { /* Facilitate smooth refclk across the transition */ - regmap_update_bits_async(fll->arizona->regmap, fll->base + 0x7, + regmap_update_bits_async(fll->arizona->regmap, fll->base + 0x9, ARIZONA_FLL1_GAIN_MASK, 0); regmap_update_bits_async(fll->arizona->regmap, fll->base + 1, ARIZONA_FLL1_FREERUN, @@ -1964,9 +1954,6 @@ static int arizona_enable_fll(struct arizona_fll *fll) if (!already_enabled) pm_runtime_get(arizona->dev); - /* Clear any pending completions */ - try_wait_for_completion(&fll->ok); - regmap_update_bits_async(arizona->regmap, fll->base + 1, ARIZONA_FLL1_ENA, ARIZONA_FLL1_ENA); if (use_sync) @@ -1978,10 +1965,24 @@ static int arizona_enable_fll(struct arizona_fll *fll) regmap_update_bits_async(arizona->regmap, fll->base + 1, ARIZONA_FLL1_FREERUN, 0); - time_left = wait_for_completion_timeout(&fll->ok, - msecs_to_jiffies(250)); - if (time_left == 0) + arizona_fll_dbg(fll, "Waiting for FLL lock...\n"); + val = 0; + for (i = 0; i < 15; i++) { + if (i < 5) + usleep_range(200, 400); + else + msleep(20); + + regmap_read(arizona->regmap, + ARIZONA_INTERRUPT_RAW_STATUS_5, + &val); + if (val & (ARIZONA_FLL1_CLOCK_OK_STS << (fll->id - 1))) + break; + } + if (i == 15) arizona_fll_warn(fll, "Timed out waiting for lock\n"); + else + arizona_fll_dbg(fll, "FLL locked (%d polls)\n", i); return 0; } @@ -2066,11 +2067,8 @@ EXPORT_SYMBOL_GPL(arizona_set_fll); int arizona_init_fll(struct arizona *arizona, int id, int base, int lock_irq, int ok_irq, struct arizona_fll *fll) { - int ret; unsigned int val; - init_completion(&fll->ok); - fll->id = id; fll->base = base; fll->arizona = arizona; @@ -2092,13 +2090,6 @@ int arizona_init_fll(struct arizona *arizona, int id, int base, int lock_irq, snprintf(fll->clock_ok_name, sizeof(fll->clock_ok_name), "FLL%d clock OK", id); - ret = arizona_request_irq(arizona, ok_irq, fll->clock_ok_name, - arizona_fll_clock_ok, fll); - if (ret != 0) { - dev_err(arizona->dev, "Failed to get FLL%d clock OK IRQ: %d\n", - id, ret); - } - regmap_update_bits(arizona->regmap, fll->base + 1, ARIZONA_FLL1_FREERUN, 0); diff --git a/sound/soc/codecs/arizona.h b/sound/soc/codecs/arizona.h index 11ff899b02724..14e8485b55858 100644 --- a/sound/soc/codecs/arizona.h +++ b/sound/soc/codecs/arizona.h @@ -233,7 +233,6 @@ struct arizona_fll { int id; unsigned int base; unsigned int vco_mult; - struct completion ok; unsigned int fout; int sync_src; diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index c5f35a07e8e48..3ad7f5be1cfa9 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -85,7 +85,15 @@ static const DECLARE_TLV_DB_SCALE(pga_tlv, 0, 300, 0); static const DECLARE_TLV_DB_SCALE(bypass_tlv, -1500, 300, 0); static const DECLARE_TLV_DB_SCALE(mic_tlv, 0, 300, 0); -static const int deemph_settings[] = { 0, 32000, 44100, 48000 }; +static const struct { + int rate; + unsigned int val; +} deemph_settings[] = { + { 0, ES8328_DACCONTROL6_DEEMPH_OFF }, + { 32000, ES8328_DACCONTROL6_DEEMPH_32k }, + { 44100, ES8328_DACCONTROL6_DEEMPH_44_1k }, + { 48000, ES8328_DACCONTROL6_DEEMPH_48k }, +}; static int es8328_set_deemph(struct snd_soc_codec *codec) { @@ -97,21 +105,22 @@ static int es8328_set_deemph(struct snd_soc_codec *codec) * rate. */ if (es8328->deemph) { - best = 1; - for (i = 2; i < ARRAY_SIZE(deemph_settings); i++) { - if (abs(deemph_settings[i] - es8328->playback_fs) < - abs(deemph_settings[best] - es8328->playback_fs)) + best = 0; + for (i = 1; i < ARRAY_SIZE(deemph_settings); i++) { + if (abs(deemph_settings[i].rate - es8328->playback_fs) < + abs(deemph_settings[best].rate - es8328->playback_fs)) best = i; } - val = best << 1; + val = deemph_settings[best].val; } else { - val = 0; + val = ES8328_DACCONTROL6_DEEMPH_OFF; } dev_dbg(codec->dev, "Set deemphasis %d\n", val); - return snd_soc_update_bits(codec, ES8328_DACCONTROL6, 0x6, val); + return snd_soc_update_bits(codec, ES8328_DACCONTROL6, + ES8328_DACCONTROL6_DEEMPH_MASK, val); } static int es8328_get_deemph(struct snd_kcontrol *kcontrol, diff --git a/sound/soc/codecs/es8328.h b/sound/soc/codecs/es8328.h index cb36afe10c0ec..156c748c89c7e 100644 --- a/sound/soc/codecs/es8328.h +++ b/sound/soc/codecs/es8328.h @@ -153,6 +153,7 @@ int es8328_probe(struct device *dev, struct regmap *regmap); #define ES8328_DACCONTROL6_CLICKFREE (1 << 3) #define ES8328_DACCONTROL6_DAC_INVR (1 << 4) #define ES8328_DACCONTROL6_DAC_INVL (1 << 5) +#define ES8328_DACCONTROL6_DEEMPH_MASK (3 << 6) #define ES8328_DACCONTROL6_DEEMPH_OFF (0 << 6) #define ES8328_DACCONTROL6_DEEMPH_32k (1 << 6) #define ES8328_DACCONTROL6_DEEMPH_44_1k (2 << 6) diff --git a/sound/soc/codecs/max98925.c b/sound/soc/codecs/max98925.c index 9b5a17de46909..aad664225dc3a 100644 --- a/sound/soc/codecs/max98925.c +++ b/sound/soc/codecs/max98925.c @@ -346,7 +346,7 @@ static int max98925_dai_set_fmt(struct snd_soc_dai *codec_dai, } regmap_update_bits(max98925->regmap, MAX98925_FORMAT, - M98925_DAI_BCI_MASK, invert); + M98925_DAI_BCI_MASK | M98925_DAI_WCI_MASK, invert); return 0; } diff --git a/sound/soc/codecs/pcm1681.c b/sound/soc/codecs/pcm1681.c index 477e13d309713..e7ba557979cb2 100644 --- a/sound/soc/codecs/pcm1681.c +++ b/sound/soc/codecs/pcm1681.c @@ -102,7 +102,7 @@ static int pcm1681_set_deemph(struct snd_soc_codec *codec) if (val != -1) { regmap_update_bits(priv->regmap, PCM1681_DEEMPH_CONTROL, - PCM1681_DEEMPH_RATE_MASK, val); + PCM1681_DEEMPH_RATE_MASK, val << 3); enable = 1; } else enable = 0; diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 178e55d4d4814..06317f7d945f0 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -985,6 +985,35 @@ static int rt5640_hp_event(struct snd_soc_dapm_widget *w, return 0; } +static int rt5640_lout_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + hp_amp_power_on(codec); + snd_soc_update_bits(codec, RT5640_PWR_ANLG1, + RT5640_PWR_LM, RT5640_PWR_LM); + snd_soc_update_bits(codec, RT5640_OUTPUT, + RT5640_L_MUTE | RT5640_R_MUTE, 0); + break; + + case SND_SOC_DAPM_PRE_PMD: + snd_soc_update_bits(codec, RT5640_OUTPUT, + RT5640_L_MUTE | RT5640_R_MUTE, + RT5640_L_MUTE | RT5640_R_MUTE); + snd_soc_update_bits(codec, RT5640_PWR_ANLG1, + RT5640_PWR_LM, 0); + break; + + default: + return 0; + } + + return 0; +} + static int rt5640_hp_power_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -1180,13 +1209,16 @@ static const struct snd_soc_dapm_widget rt5640_dapm_widgets[] = { 0, rt5640_spo_l_mix, ARRAY_SIZE(rt5640_spo_l_mix)), SND_SOC_DAPM_MIXER("SPOR MIX", SND_SOC_NOPM, 0, 0, rt5640_spo_r_mix, ARRAY_SIZE(rt5640_spo_r_mix)), - SND_SOC_DAPM_MIXER("LOUT MIX", RT5640_PWR_ANLG1, RT5640_PWR_LM_BIT, 0, + SND_SOC_DAPM_MIXER("LOUT MIX", SND_SOC_NOPM, 0, 0, rt5640_lout_mix, ARRAY_SIZE(rt5640_lout_mix)), SND_SOC_DAPM_SUPPLY_S("Improve HP Amp Drv", 1, SND_SOC_NOPM, 0, 0, rt5640_hp_power_event, SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_PGA_S("HP Amp", 1, SND_SOC_NOPM, 0, 0, rt5640_hp_event, SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_PGA_S("LOUT amp", 1, SND_SOC_NOPM, 0, 0, + rt5640_lout_event, + SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("HP L Amp", RT5640_PWR_ANLG1, RT5640_PWR_HP_L_BIT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("HP R Amp", RT5640_PWR_ANLG1, @@ -1501,8 +1533,10 @@ static const struct snd_soc_dapm_route rt5640_dapm_routes[] = { {"HP R Playback", "Switch", "HP Amp"}, {"HPOL", NULL, "HP L Playback"}, {"HPOR", NULL, "HP R Playback"}, - {"LOUTL", NULL, "LOUT MIX"}, - {"LOUTR", NULL, "LOUT MIX"}, + + {"LOUT amp", NULL, "LOUT MIX"}, + {"LOUTL", NULL, "LOUT amp"}, + {"LOUTR", NULL, "LOUT amp"}, }; static const struct snd_soc_dapm_route rt5640_specific_dapm_routes[] = { diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index be4d741c45baa..6cbd03a5e53db 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -487,7 +487,7 @@ static const struct snd_kcontrol_new rt5645_snd_controls[] = { /* IN1/IN2 Control */ SOC_SINGLE_TLV("IN1 Boost", RT5645_IN1_CTRL1, - RT5645_BST_SFT1, 8, 0, bst_tlv), + RT5645_BST_SFT1, 12, 0, bst_tlv), SOC_SINGLE_TLV("IN2 Boost", RT5645_IN2_CTRL, RT5645_BST_SFT2, 8, 0, bst_tlv), @@ -2837,6 +2837,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, } } + INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work); + if (rt5645->i2c->irq) { ret = request_threaded_irq(rt5645->i2c->irq, NULL, rt5645_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING @@ -2855,8 +2857,6 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, dev_err(&i2c->dev, "Fail gpio_direction hp_det_gpio\n"); } - INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work); - return snd_soc_register_codec(&i2c->dev, &soc_codec_dev_rt5645, rt5645_dai, ARRAY_SIZE(rt5645_dai)); } diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 3593a1496056d..3a29c0ac5d8a2 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1339,8 +1339,8 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) sgtl5000->micbias_resistor << SGTL5000_BIAS_R_SHIFT); snd_soc_update_bits(codec, SGTL5000_CHIP_MIC_CTRL, - SGTL5000_BIAS_R_MASK, - sgtl5000->micbias_voltage << SGTL5000_BIAS_R_SHIFT); + SGTL5000_BIAS_VOLT_MASK, + sgtl5000->micbias_voltage << SGTL5000_BIAS_VOLT_SHIFT); /* * disable DAP * TODO: diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c index a984485108cd1..f7549cc7ea855 100644 --- a/sound/soc/codecs/ssm4567.c +++ b/sound/soc/codecs/ssm4567.c @@ -315,7 +315,13 @@ static int ssm4567_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (invert_fclk) ctrl1 |= SSM4567_SAI_CTRL_1_FSYNC; - return regmap_write(ssm4567->regmap, SSM4567_REG_SAI_CTRL_1, ctrl1); + return regmap_update_bits(ssm4567->regmap, SSM4567_REG_SAI_CTRL_1, + SSM4567_SAI_CTRL_1_BCLK | + SSM4567_SAI_CTRL_1_FSYNC | + SSM4567_SAI_CTRL_1_LJ | + SSM4567_SAI_CTRL_1_TDM | + SSM4567_SAI_CTRL_1_PDM, + ctrl1); } static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable) diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c index dfb4ff5cc9ea1..18558595ba723 100644 --- a/sound/soc/codecs/tas2552.c +++ b/sound/soc/codecs/tas2552.c @@ -120,6 +120,9 @@ static void tas2552_sw_shutdown(struct tas2552_data *tas_data, int sw_shutdown) { u8 cfg1_reg; + if (!tas_data->codec) + return; + if (sw_shutdown) cfg1_reg = 0; else @@ -335,7 +338,6 @@ static DECLARE_TLV_DB_SCALE(dac_tlv, -7, 100, 24); static const struct snd_kcontrol_new tas2552_snd_controls[] = { SOC_SINGLE_TLV("Speaker Driver Playback Volume", TAS2552_PGA_GAIN, 0, 0x1f, 1, dac_tlv), - SOC_DAPM_SINGLE("Playback AMP", SND_SOC_NOPM, 0, 1, 0), }; static const struct reg_default tas2552_init_regs[] = { diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index 0c6d1bc0526ef..d476221dba51d 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -42,7 +42,7 @@ struct wm5102_priv { static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); -static DECLARE_TLV_DB_SCALE(noise_tlv, 0, 600, 0); +static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); static const struct wm_adsp_region wm5102_dsp1_regions[] = { diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index fbaeddb3e9033..3ee6cfd0578be 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -167,7 +167,7 @@ static int wm5110_sysclk_ev(struct snd_soc_dapm_widget *w, static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); -static DECLARE_TLV_DB_SCALE(noise_tlv, 0, 600, 0); +static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); #define WM5110_NG_SRC(name, base) \ diff --git a/sound/soc/codecs/wm8737.c b/sound/soc/codecs/wm8737.c index ada9ac1ba2c64..51171e457fa48 100644 --- a/sound/soc/codecs/wm8737.c +++ b/sound/soc/codecs/wm8737.c @@ -483,7 +483,8 @@ static int wm8737_set_bias_level(struct snd_soc_codec *codec, /* Fast VMID ramp at 2*2.5k */ snd_soc_update_bits(codec, WM8737_MISC_BIAS_CONTROL, - WM8737_VMIDSEL_MASK, 0x4); + WM8737_VMIDSEL_MASK, + 2 << WM8737_VMIDSEL_SHIFT); /* Bring VMID up */ snd_soc_update_bits(codec, WM8737_POWER_MANAGEMENT, @@ -497,7 +498,8 @@ static int wm8737_set_bias_level(struct snd_soc_codec *codec, /* VMID at 2*300k */ snd_soc_update_bits(codec, WM8737_MISC_BIAS_CONTROL, - WM8737_VMIDSEL_MASK, 2); + WM8737_VMIDSEL_MASK, + 1 << WM8737_VMIDSEL_SHIFT); break; diff --git a/sound/soc/codecs/wm8903.h b/sound/soc/codecs/wm8903.h index db949311c0f20..0bb4a647755d8 100644 --- a/sound/soc/codecs/wm8903.h +++ b/sound/soc/codecs/wm8903.h @@ -172,7 +172,7 @@ extern int wm8903_mic_detect(struct snd_soc_codec *codec, #define WM8903_VMID_BUF_ENA_WIDTH 1 /* VMID_BUF_ENA */ #define WM8903_VMID_RES_50K 2 -#define WM8903_VMID_RES_250K 3 +#define WM8903_VMID_RES_250K 4 #define WM8903_VMID_RES_5K 6 /* diff --git a/sound/soc/codecs/wm8955.c b/sound/soc/codecs/wm8955.c index 00bec915d6522..03e04bf6c5ba2 100644 --- a/sound/soc/codecs/wm8955.c +++ b/sound/soc/codecs/wm8955.c @@ -298,7 +298,7 @@ static int wm8955_configure_clocking(struct snd_soc_codec *codec) snd_soc_update_bits(codec, WM8955_PLL_CONTROL_2, WM8955_K_17_9_MASK, (pll.k >> 9) & WM8955_K_17_9_MASK); - snd_soc_update_bits(codec, WM8955_PLL_CONTROL_2, + snd_soc_update_bits(codec, WM8955_PLL_CONTROL_3, WM8955_K_8_0_MASK, pll.k & WM8955_K_8_0_MASK); if (pll.k) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index e97a7615df850..8d7f632534406 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -245,7 +245,7 @@ SOC_SINGLE("PCM Playback -6dB Switch", WM8960_DACCTL1, 7, 1, 0), SOC_ENUM("ADC Polarity", wm8960_enum[0]), SOC_SINGLE("ADC High Pass Filter Switch", WM8960_DACCTL1, 0, 1, 0), -SOC_ENUM("DAC Polarity", wm8960_enum[2]), +SOC_ENUM("DAC Polarity", wm8960_enum[1]), SOC_SINGLE_BOOL_EXT("DAC Deemphasis Switch", 0, wm8960_get_deemph, wm8960_put_deemph), diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 118b0034ba235..154c1a24a303f 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -365,8 +365,8 @@ static struct reg_default wm8962_reg[] = { { 16924, 0x0059 }, /* R16924 - HDBASS_PG_1 */ { 16925, 0x999A }, /* R16925 - HDBASS_PG_0 */ - { 17048, 0x0083 }, /* R17408 - HPF_C_1 */ - { 17049, 0x98AD }, /* R17409 - HPF_C_0 */ + { 17408, 0x0083 }, /* R17408 - HPF_C_1 */ + { 17409, 0x98AD }, /* R17409 - HPF_C_0 */ { 17920, 0x007F }, /* R17920 - ADCL_RETUNE_C1_1 */ { 17921, 0xFFFF }, /* R17921 - ADCL_RETUNE_C1_0 */ diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c index ff0e4646b934c..88317c1b7f96c 100644 --- a/sound/soc/codecs/wm8974.c +++ b/sound/soc/codecs/wm8974.c @@ -575,6 +575,7 @@ static const struct regmap_config wm8974_regmap = { .max_register = WM8974_MONOMIX, .reg_defaults = wm8974_reg_defaults, .num_reg_defaults = ARRAY_SIZE(wm8974_reg_defaults), + .cache_type = REGCACHE_FLAT, }; static int wm8974_probe(struct snd_soc_codec *codec) diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c index a4d11770630cd..e7c81baefe662 100644 --- a/sound/soc/codecs/wm8997.c +++ b/sound/soc/codecs/wm8997.c @@ -40,7 +40,7 @@ struct wm8997_priv { static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); -static DECLARE_TLV_DB_SCALE(noise_tlv, 0, 600, 0); +static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); static const struct reg_default wm8997_sysclk_reva_patch[] = { diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 23c91fa65ab8f..76dd8c6aa4f0e 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -221,8 +221,8 @@ static void mcasp_start_tx(struct davinci_mcasp *mcasp) /* wait for XDATA to be cleared */ cnt = 0; - while (!(mcasp_get_reg(mcasp, DAVINCI_MCASP_TXSTAT_REG) & - ~XRDATA) && (cnt < 100000)) + while ((mcasp_get_reg(mcasp, DAVINCI_MCASP_TXSTAT_REG) & XRDATA) && + (cnt < 100000)) cnt++; /* Release TX state machine */ diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c index a3e97b46b64e3..0d28e3b356f67 100644 --- a/sound/soc/dwc/designware_i2s.c +++ b/sound/soc/dwc/designware_i2s.c @@ -131,10 +131,10 @@ static inline void i2s_clear_irqs(struct dw_i2s_dev *dev, u32 stream) if (stream == SNDRV_PCM_STREAM_PLAYBACK) { for (i = 0; i < 4; i++) - i2s_write_reg(dev->i2s_base, TOR(i), 0); + i2s_read_reg(dev->i2s_base, TOR(i)); } else { for (i = 0; i < 4; i++) - i2s_write_reg(dev->i2s_base, ROR(i), 0); + i2s_read_reg(dev->i2s_base, ROR(i)); } } diff --git a/sound/soc/fsl/imx-wm8962.c b/sound/soc/fsl/imx-wm8962.c index cd146d4fa8054..b38b98cae855d 100644 --- a/sound/soc/fsl/imx-wm8962.c +++ b/sound/soc/fsl/imx-wm8962.c @@ -190,7 +190,7 @@ static int imx_wm8962_probe(struct platform_device *pdev) dev_err(&pdev->dev, "audmux internal port setup failed\n"); return ret; } - imx_audmux_v2_configure_port(ext_port, + ret = imx_audmux_v2_configure_port(ext_port, IMX_AUDMUX_V2_PTCR_SYN, IMX_AUDMUX_V2_PDCR_RXDSEL(int_port)); if (ret) { diff --git a/sound/soc/intel/atom/sst/sst_drv_interface.c b/sound/soc/intel/atom/sst/sst_drv_interface.c index 7b50a9d17ec1b..edc1869083587 100644 --- a/sound/soc/intel/atom/sst/sst_drv_interface.c +++ b/sound/soc/intel/atom/sst/sst_drv_interface.c @@ -42,6 +42,11 @@ #define MIN_FRAGMENT_SIZE (50 * 1024) #define MAX_FRAGMENT_SIZE (1024 * 1024) #define SST_GET_BYTES_PER_SAMPLE(pcm_wd_sz) (((pcm_wd_sz + 15) >> 4) << 1) +#ifdef CONFIG_PM +#define GET_USAGE_COUNT(dev) (atomic_read(&dev->power.usage_count)) +#else +#define GET_USAGE_COUNT(dev) 1 +#endif int free_stream_context(struct intel_sst_drv *ctx, unsigned int str_id) { @@ -141,15 +146,9 @@ static int sst_power_control(struct device *dev, bool state) int ret = 0; int usage_count = 0; -#ifdef CONFIG_PM - usage_count = atomic_read(&dev->power.usage_count); -#else - usage_count = 1; -#endif - if (state == true) { ret = pm_runtime_get_sync(dev); - + usage_count = GET_USAGE_COUNT(dev); dev_dbg(ctx->dev, "Enable: pm usage count: %d\n", usage_count); if (ret < 0) { dev_err(ctx->dev, "Runtime get failed with err: %d\n", ret); @@ -164,6 +163,7 @@ static int sst_power_control(struct device *dev, bool state) } } } else { + usage_count = GET_USAGE_COUNT(dev); dev_dbg(ctx->dev, "Disable: pm usage count: %d\n", usage_count); return sst_pm_runtime_put(ctx); } diff --git a/sound/soc/omap/Kconfig b/sound/soc/omap/Kconfig index 6768e4f7d7d0e..30d0109703a93 100644 --- a/sound/soc/omap/Kconfig +++ b/sound/soc/omap/Kconfig @@ -100,12 +100,13 @@ config SND_OMAP_SOC_OMAP_TWL4030 config SND_OMAP_SOC_OMAP_ABE_TWL6040 tristate "SoC Audio support for OMAP boards using ABE and twl6040 codec" - depends on TWL6040_CORE && SND_OMAP_SOC && (ARCH_OMAP4 || SOC_OMAP5 || COMPILE_TEST) + depends on TWL6040_CORE && SND_OMAP_SOC + depends on ARCH_OMAP4 || (SOC_OMAP5 && MFD_PALMAS) || COMPILE_TEST select SND_OMAP_SOC_DMIC select SND_OMAP_SOC_MCPDM select SND_SOC_TWL6040 select SND_SOC_DMIC - select COMMON_CLK_PALMAS if MFD_PALMAS + select COMMON_CLK_PALMAS if (SOC_OMAP5 && MFD_PALMAS) help Say Y if you want to add support for SoC audio on OMAP boards using ABE and twl6040 codec. This driver currently supports: diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 39cea80846c31..f2bf8661dd21f 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,7 +1,6 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" depends on ARCH_PXA - select SND_ARM select SND_PXA2XX_LIB help Say Y or M if you want to add support for codecs attached to @@ -25,7 +24,6 @@ config SND_PXA2XX_AC97 config SND_PXA2XX_SOC_AC97 tristate select AC97_BUS - select SND_ARM select SND_PXA2XX_LIB_AC97 select SND_SOC_AC97_BUS diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index 1f6054650991d..9e4b04e0fbd12 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -49,7 +49,7 @@ static struct snd_ac97_bus_ops pxa2xx_ac97_ops = { .reset = pxa2xx_ac97_cold_reset, }; -static unsigned long pxa2xx_ac97_pcm_stereo_in_req = 12; +static unsigned long pxa2xx_ac97_pcm_stereo_in_req = 11; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, @@ -57,7 +57,7 @@ static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .filter_data = &pxa2xx_ac97_pcm_stereo_in_req, }; -static unsigned long pxa2xx_ac97_pcm_stereo_out_req = 11; +static unsigned long pxa2xx_ac97_pcm_stereo_out_req = 12; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_out = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index 5f58e4f1bca98..b07f183fc47f0 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -6,12 +6,10 @@ config SND_SOC_QCOM config SND_SOC_LPASS_CPU tristate - depends on SND_SOC_QCOM select REGMAP_MMIO config SND_SOC_LPASS_PLATFORM tristate - depends on SND_SOC_QCOM select REGMAP_MMIO config SND_SOC_STORM diff --git a/sound/soc/samsung/arndale_rt5631.c b/sound/soc/samsung/arndale_rt5631.c index 8bf2e2c4bafb9..9e371eb3e4faf 100644 --- a/sound/soc/samsung/arndale_rt5631.c +++ b/sound/soc/samsung/arndale_rt5631.c @@ -116,15 +116,6 @@ static int arndale_audio_probe(struct platform_device *pdev) return ret; } -static int arndale_audio_remove(struct platform_device *pdev) -{ - struct snd_soc_card *card = platform_get_drvdata(pdev); - - snd_soc_unregister_card(card); - - return 0; -} - static const struct of_device_id samsung_arndale_rt5631_of_match[] __maybe_unused = { { .compatible = "samsung,arndale-rt5631", }, { .compatible = "samsung,arndale-alc5631", }, @@ -139,7 +130,6 @@ static struct platform_driver arndale_audio_driver = { .of_match_table = of_match_ptr(samsung_arndale_rt5631_of_match), }, .probe = arndale_audio_probe, - .remove = arndale_audio_remove, }; module_platform_driver(arndale_audio_driver); diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c index 8c7dc51b1c4fd..f7a0cb786d5c3 100644 --- a/sound/soc/sh/rcar/gen.c +++ b/sound/soc/sh/rcar/gen.c @@ -214,7 +214,7 @@ static int rsnd_gen2_probe(struct platform_device *pdev, RSND_GEN_S_REG(SCU_SYS_STATUS0, 0x1c8), RSND_GEN_S_REG(SCU_SYS_INT_EN0, 0x1cc), RSND_GEN_S_REG(SCU_SYS_STATUS1, 0x1d0), - RSND_GEN_S_REG(SCU_SYS_INT_EN1, 0x1c4), + RSND_GEN_S_REG(SCU_SYS_INT_EN1, 0x1d4), RSND_GEN_M_REG(SRC_SWRSR, 0x200, 0x40), RSND_GEN_M_REG(SRC_SRCIR, 0x204, 0x40), RSND_GEN_M_REG(SRC_ADINR, 0x214, 0x40), diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 025c38fbe3c03..1874cf0e6caba 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -623,6 +623,7 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) struct snd_pcm *be_pcm; char new_name[64]; int ret = 0, direction = 0; + int playback = 0, capture = 0; if (rtd->num_codecs > 1) { dev_err(rtd->card->dev, "Multicodec not supported for compressed stream\n"); @@ -634,11 +635,27 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) rtd->dai_link->stream_name, codec_dai->name, num); if (codec_dai->driver->playback.channels_min) + playback = 1; + if (codec_dai->driver->capture.channels_min) + capture = 1; + + capture = capture && cpu_dai->driver->capture.channels_min; + playback = playback && cpu_dai->driver->playback.channels_min; + + /* + * Compress devices are unidirectional so only one of the directions + * should be set, check for that (xor) + */ + if (playback + capture != 1) { + dev_err(rtd->card->dev, "Invalid direction for compress P %d, C %d\n", + playback, capture); + return -EINVAL; + } + + if(playback) direction = SND_COMPRESS_PLAYBACK; - else if (codec_dai->driver->capture.channels_min) - direction = SND_COMPRESS_CAPTURE; else - return -EINVAL; + direction = SND_COMPRESS_CAPTURE; compr = kzalloc(sizeof(*compr), GFP_KERNEL); if (compr == NULL) { diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 158204d089249..b6c12dccb2591 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1811,6 +1811,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, size_t count, loff_t *ppos) { struct snd_soc_dapm_widget *w = file->private_data; + struct snd_soc_card *card = w->dapm->card; char *buf; int in, out; ssize_t ret; @@ -1820,6 +1821,8 @@ static ssize_t dapm_widget_power_read_file(struct file *file, if (!buf) return -ENOMEM; + mutex_lock(&card->dapm_mutex); + /* Supply widgets are not handled by is_connected_{input,output}_ep() */ if (w->is_supply) { in = 0; @@ -1866,6 +1869,8 @@ static ssize_t dapm_widget_power_read_file(struct file *file, p->sink->name); } + mutex_unlock(&card->dapm_mutex); + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); @@ -2140,11 +2145,15 @@ static ssize_t dapm_widget_show(struct device *dev, struct snd_soc_pcm_runtime *rtd = dev_get_drvdata(dev); int i, count = 0; + mutex_lock(&rtd->card->dapm_mutex); + for (i = 0; i < rtd->num_codecs; i++) { struct snd_soc_codec *codec = rtd->codec_dais[i]->codec; count += dapm_widget_show_codec(codec, buf + count); } + mutex_unlock(&rtd->card->dapm_mutex); + return count; } @@ -3100,16 +3109,10 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, } prefix = soc_dapm_prefix(dapm); - if (prefix) { + if (prefix) w->name = kasprintf(GFP_KERNEL, "%s %s", prefix, widget->name); - if (widget->sname) - w->sname = kasprintf(GFP_KERNEL, "%s %s", prefix, - widget->sname); - } else { + else w->name = kasprintf(GFP_KERNEL, "%s", widget->name); - if (widget->sname) - w->sname = kasprintf(GFP_KERNEL, "%s", widget->sname); - } if (w->name == NULL) { kfree(w); return NULL; @@ -3557,7 +3560,7 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) break; } - if (!w->sname || !strstr(w->sname, dai_w->name)) + if (!w->sname || !strstr(w->sname, dai_w->sname)) continue; if (dai_w->id == snd_soc_dapm_dai_in) { diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 100d92b5b77ef..05977ae1ff2a3 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -206,6 +206,34 @@ int snd_soc_info_volsw(struct snd_kcontrol *kcontrol, } EXPORT_SYMBOL_GPL(snd_soc_info_volsw); +/** + * snd_soc_info_volsw_sx - Mixer info callback for SX TLV controls + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to provide information about a single mixer control, or a double + * mixer control that spans 2 registers of the SX TLV type. SX TLV controls + * have a range that represents both positive and negative values either side + * of zero but without a sign bit. + * + * Returns 0 for success. + */ +int snd_soc_info_volsw_sx(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + + snd_soc_info_volsw(kcontrol, uinfo); + /* Max represents the number of levels in an SX control not the + * maximum value, so add the minimum value back on + */ + uinfo->value.integer.max += mc->min; + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_info_volsw_sx); + /** * snd_soc_get_volsw - single mixer get callback * @kcontrol: mixer control diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 35fe58f4fa862..52fe7eb2dea1f 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1661,7 +1661,8 @@ int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP)) + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND)) continue; dev_dbg(be->dev, "ASoC: hw_free BE %s\n", diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index 82e350e9501cc..ac75816ada7c3 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -69,7 +69,8 @@ snd_emux_init_seq_oss(struct snd_emux *emu) struct snd_seq_oss_reg *arg; struct snd_seq_device *dev; - if (snd_seq_device_new(emu->card, 0, SNDRV_SEQ_DEV_ID_OSS, + /* using device#1 here for avoiding conflicts with OPL3 */ + if (snd_seq_device_new(emu->card, 1, SNDRV_SEQ_DEV_ID_OSS, sizeof(struct snd_seq_oss_reg), &dev) < 0) return; diff --git a/sound/usb/card.c b/sound/usb/card.c index 1fab9778807a0..0450593980fd3 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -638,7 +638,7 @@ int snd_usb_autoresume(struct snd_usb_audio *chip) int err = -ENODEV; down_read(&chip->shutdown_rwsem); - if (chip->probing && chip->in_pm) + if (chip->probing || chip->in_pm) err = 0; else if (!chip->shutdown) err = usb_autopm_get_interface(chip->pm_intf); diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 8461d6bf992f8..204cc074adb96 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -186,12 +186,8 @@ static int line6_stream_start(struct snd_line6_pcm *line6pcm, int direction, int ret = 0; spin_lock_irqsave(&pstr->lock, flags); - if (!test_and_set_bit(type, &pstr->running)) { - if (pstr->active_urbs || pstr->unlink_urbs) { - ret = -EBUSY; - goto error; - } - + if (!test_and_set_bit(type, &pstr->running) && + !(pstr->active_urbs || pstr->unlink_urbs)) { pstr->count = 0; /* Submit all currently available URBs */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) @@ -199,7 +195,6 @@ static int line6_stream_start(struct snd_line6_pcm *line6pcm, int direction, else ret = line6_submit_audio_in_all_urbs(line6pcm); } - error: if (ret < 0) clear_bit(type, &pstr->running); spin_unlock_irqrestore(&pstr->lock, flags); diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 417ebb11cf489..f059326a4914d 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -174,6 +174,8 @@ struct snd_usb_midi_in_endpoint { u8 running_status_length; } ports[0x10]; u8 seen_f5; + bool in_sysex; + u8 last_cin; u8 error_resubmit; int current_port; }; @@ -467,6 +469,39 @@ static void snd_usbmidi_maudio_broken_running_status_input( } } +/* + * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4 + * but the previously seen CIN, but still with three data bytes. + */ +static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep, + uint8_t *buffer, int buffer_length) +{ + unsigned int i, cin, length; + + for (i = 0; i + 3 < buffer_length; i += 4) { + if (buffer[i] == 0 && i > 0) + break; + cin = buffer[i] & 0x0f; + if (ep->in_sysex && + cin == ep->last_cin && + (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0) + cin = 0x4; +#if 0 + if (buffer[i + 1] == 0x90) { + /* + * Either a corrupted running status or a real note-on + * message; impossible to detect reliably. + */ + } +#endif + length = snd_usbmidi_cin_length[cin]; + snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length); + ep->in_sysex = cin == 0x4; + if (!ep->in_sysex) + ep->last_cin = cin; + } +} + /* * CME protocol: like the standard protocol, but SysEx commands are sent as a * single USB packet preceded by a 0x0F byte. @@ -660,6 +695,12 @@ static struct usb_protocol_ops snd_usbmidi_cme_ops = { .output_packet = snd_usbmidi_output_standard_packet, }; +static struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = { + .input = ch345_broken_sysex_input, + .output = snd_usbmidi_standard_output, + .output_packet = snd_usbmidi_output_standard_packet, +}; + /* * AKAI MPD16 protocol: * @@ -1341,6 +1382,7 @@ static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi, * Various chips declare a packet size larger than 4 bytes, but * do not actually work with larger packets: */ + case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */ case USB_ID(0x0a92, 0x1020): /* ESI M4U */ case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */ case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */ @@ -2373,6 +2415,10 @@ int snd_usbmidi_create(struct snd_card *card, if (err < 0) break; + err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); + break; + case QUIRK_MIDI_CH345: + umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; default: @@ -2405,7 +2451,6 @@ int snd_usbmidi_create(struct snd_card *card, else err = snd_usbmidi_create_endpoints(umidi, endpoints); if (err < 0) { - snd_usbmidi_free(umidi); return err; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 8b7e391dd0b80..f9a9752d4dbc8 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1336,6 +1336,8 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc, } } + snd_usb_mixer_fu_apply_quirk(state->mixer, cval, unitid, kctl); + range = (cval->max - cval->min) / cval->res; /* * Are there devices with volume range more than 255? I use a bit more @@ -2522,7 +2524,7 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) for (c = 0; c < MAX_CHANNELS; c++) { if (!(cval->cmask & (1 << c))) continue; - if (cval->cached & (1 << c)) { + if (cval->cached & (1 << (c + 1))) { err = snd_usb_set_cur_mix_value(cval, c + 1, idx, cval->cache_val[idx]); if (err < 0) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index e5000da9e9d70..ddca6547399b0 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -341,6 +341,13 @@ static const struct usbmix_name_map scms_usb3318_map[] = { { 0 } }; +/* Bose companion 5, the dB conversion factor is 16 instead of 256 */ +static struct usbmix_dB_map bose_companion5_dB = {-5006, -6}; +static struct usbmix_name_map bose_companion5_map[] = { + { 3, NULL, .dB = &bose_companion5_dB }, + { 0 } /* terminator */ +}; + /* * Control map entries */ @@ -451,6 +458,11 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x25c4, 0x0003), .map = scms_usb3318_map, }, + { + /* Bose Companion 5 */ + .id = USB_ID(0x05a7, 0x1020), + .map = bose_companion5_map, + }, { 0 } /* terminator */ }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 337c317ead6fb..db9547d04f385 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "usbaudio.h" #include "mixer.h" @@ -802,7 +803,7 @@ static int snd_nativeinstruments_control_put(struct snd_kcontrol *kcontrol, return 0; kcontrol->private_value &= ~(0xff << 24); - kcontrol->private_value |= newval; + kcontrol->private_value |= (unsigned int)newval << 24; err = snd_ni_update_cur_val(list); return err < 0 ? err : 1; } @@ -1843,3 +1844,39 @@ void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer, } } +static void snd_dragonfly_quirk_db_scale(struct usb_mixer_interface *mixer, + struct snd_kcontrol *kctl) +{ + /* Approximation using 10 ranges based on output measurement on hw v1.2. + * This seems close to the cubic mapping e.g. alsamixer uses. */ + static const DECLARE_TLV_DB_RANGE(scale, + 0, 1, TLV_DB_MINMAX_ITEM(-5300, -4970), + 2, 5, TLV_DB_MINMAX_ITEM(-4710, -4160), + 6, 7, TLV_DB_MINMAX_ITEM(-3884, -3710), + 8, 14, TLV_DB_MINMAX_ITEM(-3443, -2560), + 15, 16, TLV_DB_MINMAX_ITEM(-2475, -2324), + 17, 19, TLV_DB_MINMAX_ITEM(-2228, -2031), + 20, 26, TLV_DB_MINMAX_ITEM(-1910, -1393), + 27, 31, TLV_DB_MINMAX_ITEM(-1322, -1032), + 32, 40, TLV_DB_MINMAX_ITEM(-968, -490), + 41, 50, TLV_DB_MINMAX_ITEM(-441, 0), + ); + + usb_audio_info(mixer->chip, "applying DragonFly dB scale quirk\n"); + kctl->tlv.p = scale; + kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ; + kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; +} + +void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, + struct usb_mixer_elem_info *cval, int unitid, + struct snd_kcontrol *kctl) +{ + switch (mixer->chip->usb_id) { + case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */ + if (unitid == 7 && cval->min == 0 && cval->max == 50) + snd_dragonfly_quirk_db_scale(mixer, kctl); + break; + } +} + diff --git a/sound/usb/mixer_quirks.h b/sound/usb/mixer_quirks.h index bdbfab093816a..177c329cd4ddb 100644 --- a/sound/usb/mixer_quirks.h +++ b/sound/usb/mixer_quirks.h @@ -9,5 +9,9 @@ void snd_emuusb_set_samplerate(struct snd_usb_audio *chip, void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer, int unitid); +void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, + struct usb_mixer_elem_info *cval, int unitid, + struct snd_kcontrol *kctl); + #endif /* SND_USB_MIXER_QUIRKS_H */ diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 2f6d3e9a1bcd0..ecc2a4ea014d2 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2512,6 +2512,74 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Steinberg devices */ +{ + /* Steinberg MI2 */ + USB_DEVICE_VENDOR_SPEC(0x0a4e, 0x2040), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = & (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = &(const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, +{ + /* Steinberg MI4 */ + USB_DEVICE_VENDOR_SPEC(0x0a4e, 0x4040), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = & (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = &(const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, + /* TerraTec devices */ { USB_DEVICE_VENDOR_SPEC(0x0ccd, 0x0012), @@ -2752,6 +2820,17 @@ YAMAHA_DEVICE(0x7010, "UB99"), .idProduct = 0x1020, }, +/* QinHeng devices */ +{ + USB_DEVICE(0x1a86, 0x752d), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "QinHeng", + .product_name = "CH345", + .ifnum = 1, + .type = QUIRK_MIDI_CH345 + } +}, + /* KeithMcMillen Stringport */ { USB_DEVICE(0x1f38, 0x0001), diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 754e689596a21..a4d03e5da3e08 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -535,6 +535,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, [QUIRK_MIDI_CME] = create_any_midi_quirk, [QUIRK_MIDI_AKAI] = create_any_midi_quirk, [QUIRK_MIDI_FTDI] = create_any_midi_quirk, + [QUIRK_MIDI_CH345] = create_any_midi_quirk, [QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk, [QUIRK_AUDIO_FIXED_ENDPOINT] = create_fixed_stream_quirk, [QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk, @@ -1117,10 +1118,13 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) switch (chip->usb_id) { case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ + case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; } return false; @@ -1200,8 +1204,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev) * "Playback Design" products need a 50ms delay after setting the * USB interface. */ - if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba) + switch (le16_to_cpu(dev->descriptor.idVendor)) { + case 0x23ba: /* Playback Design */ + case 0x0644: /* TEAC Corp. */ mdelay(50); + break; + } } void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, @@ -1216,6 +1224,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); + /* + * "TEAC Corp." products need a 20ms delay after each + * class compliant request + */ + if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + mdelay(20); + /* Marantz/Denon devices with USB DAC functionality need a delay * after each class compliant request */ @@ -1264,12 +1280,16 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */ case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ + case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; + case USB_ID(0x20b1, 0x000a): /* Gustard DAC-X20U */ case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */ case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */ + case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */ + case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */ if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 91d0380431b4f..991aa84491cd0 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -94,6 +94,7 @@ enum quirk_type { QUIRK_MIDI_AKAI, QUIRK_MIDI_US122L, QUIRK_MIDI_FTDI, + QUIRK_MIDI_CH345, QUIRK_AUDIO_STANDARD_INTERFACE, QUIRK_AUDIO_FIXED_ENDPOINT, QUIRK_AUDIO_EDIROL_UAXX, diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 10df57237a66d..98cfc388ea330 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -94,12 +94,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) # '$(OUTPUT)/dir' prefix to all objects -prefix := $(subst ./,,$(OUTPUT)$(dir)/) -obj-y := $(addprefix $(prefix),$(obj-y)) -subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) +objprefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(objprefix),$(obj-y)) +subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y)) # Final '$(obj)-in.o' object -in-target := $(prefix)$(obj)-in.o +in-target := $(objprefix)$(obj)-in.o PHONY += $(subdir-y) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 29f94f6f0d9e9..f64a2d54d4674 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3721,7 +3721,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct format_field *field; struct printk_map *printk; long long val, fval; - unsigned long addr; + unsigned long long addr; char *str; unsigned char *hex; int print; @@ -3754,13 +3754,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, */ if (!(field->flags & FIELD_IS_ARRAY) && field->size == pevent->long_size) { - addr = *(unsigned long *)(data + field->offset); + + /* Handle heterogeneous recording and processing + * architectures + * + * CASE I: + * Traces recorded on 32-bit devices (32-bit + * addressing) and processed on 64-bit devices: + * In this case, only 32 bits should be read. + * + * CASE II: + * Traces recorded on 64 bit devices and processed + * on 32-bit devices: + * In this case, 64 bits must be read. + */ + addr = (pevent->long_size == 8) ? + *(unsigned long long *)(data + field->offset) : + (unsigned long long)*(unsigned int *)(data + field->offset); + /* Check if it matches a print format */ printk = find_printk(pevent, addr); if (printk) trace_seq_puts(s, printk->printk); else - trace_seq_printf(s, "%lx", addr); + trace_seq_printf(s, "%llx", addr); break; } str = malloc(len + 1); @@ -4824,13 +4841,12 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event sizeof(long) != 8) { char *p; - ls = 2; /* make %l into %ll */ - p = strchr(format, 'l'); - if (p) + if (ls == 1 && (p = strchr(format, 'l'))) memmove(p+1, p, strlen(p)+1); else if (strcmp(format, "%p") == 0) strcpy(format, "0x%llx"); + ls = 2; } switch (ls) { case -2: diff --git a/tools/net/Makefile b/tools/net/Makefile index ee577ea03ba50..ddf8880106524 100644 --- a/tools/net/Makefile +++ b/tools/net/Makefile @@ -4,6 +4,9 @@ CC = gcc LEX = flex YACC = bison +CFLAGS += -Wall -O2 +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include + %.yacc.c: %.y $(YACC) -o $@ -d $< @@ -12,15 +15,13 @@ YACC = bison all : bpf_jit_disasm bpf_dbg bpf_asm -bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' +bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl bpf_jit_disasm : bpf_jit_disasm.o -bpf_dbg : CFLAGS = -Wall -O2 bpf_dbg : LDLIBS = -lreadline bpf_dbg : bpf_dbg.o -bpf_asm : CFLAGS = -Wall -O2 -I. bpf_asm : LDLIBS = bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o bpf_exp.lex.o : bpf_exp.yacc.c diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 11ccbb22ea2b8..13d0458afc716 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -28,24 +28,20 @@ include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h -include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h -lib/find_next_bit.c lib/hweight.c lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h -arch/*/include/asm/perf_regs.h arch/*/include/uapi/asm/unistd*.h arch/*/include/uapi/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h -include/linux/magic.h include/linux/hw_breakpoint.h include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 0000000000000..1bb8bf6d7fd4c --- /dev/null +++ b/tools/perf/arch/alpha/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 0000000000000..1bb8bf6d7fd4c --- /dev/null +++ b/tools/perf/arch/mips/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 0000000000000..1bb8bf6d7fd4c --- /dev/null +++ b/tools/perf/arch/parisc/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f7b8218785f6f..a1f3ffc2786de 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1227,7 +1227,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void print_aggr(char *prefix) { struct perf_evsel *counter; - int cpu, cpu2, s, s2, id, nr; + int cpu, s, s2, id, nr; double uval; u64 ena, run, val; @@ -1240,8 +1240,7 @@ static void print_aggr(char *prefix) val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - cpu2 = perf_evsel__cpus(counter)->map[cpu]; - s2 = aggr_get_id(evsel_list->cpus, cpu2); + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); if (s2 != id) continue; val += counter->counts->cpu[cpu].val; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 995b7a8596b14..658b0a89796d2 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -45,7 +45,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, static bool hist_browser__has_filter(struct hist_browser *hb) { - return hists__has_filter(hb->hists) || hb->min_pcnt; + return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; } static int hist_browser__get_folding(struct hist_browser *browser) diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 85b523885f9d7..2babddaa24813 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,11 +7,15 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) int __weak sched_getcpu(void) { errno = ENOSYS; return -1; } +#endif +#endif static int perf_flag_probe(void) { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 918fd8ae2d80b..23eea5e7fa946 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1426,7 +1426,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1435,7 +1435,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cc22b9158b93c..c7966c0fa13e4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -151,6 +151,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + if (h->srcline) + hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); + if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0c74012575ac9..83054ef6c1a19 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -816,7 +816,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, machine = machines__find(machines, pid); if (!machine) - machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); + machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a7ab6063e0389..3ddfab315e197 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1253,8 +1253,6 @@ static int kcore__open(struct kcore *kcore, const char *filename) static int kcore__init(struct kcore *kcore, char *filename, int elfclass, bool temp) { - GElf_Ehdr *ehdr; - kcore->elfclass = elfclass; if (temp) @@ -1271,9 +1269,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, if (!gelf_newehdr(kcore->elf, elfclass)) goto out_end; - ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); - if (!ehdr) - goto out_end; + memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); return 0; @@ -1330,23 +1326,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, u64 addr, u64 len) { - GElf_Phdr gphdr; - GElf_Phdr *phdr; - - phdr = gelf_getphdr(kcore->elf, idx, &gphdr); - if (!phdr) - return -1; - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_offset = offset; - phdr->p_vaddr = addr; - phdr->p_paddr = 0; - phdr->p_filesz = len; - phdr->p_memsz = len; - phdr->p_align = page_size; - - if (!gelf_update_phdr(kcore->elf, idx, phdr)) + GElf_Phdr phdr = { + .p_type = PT_LOAD, + .p_flags = PF_R | PF_W | PF_X, + .p_offset = offset, + .p_vaddr = addr, + .p_paddr = 0, + .p_filesz = len, + .p_memsz = len, + .p_align = page_size, + }; + + if (!gelf_update_phdr(kcore->elf, idx, &phdr)) return -1; return 0; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 201f6c4ca738d..99378a5c57a76 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1893,6 +1893,8 @@ int setup_intlist(struct intlist **list, const char *list_str, pr_err("problems parsing %s list\n", list_name); return -1; } + + symbol_conf.has_filter = true; return 0; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 09561500164a0..be0217989bcce 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,7 +105,8 @@ struct symbol_conf { demangle_kernel, filter_relative, show_hist_headers, - branch_callstack; + branch_callstack, + has_filter; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 95abddcd78397..f76830643086e 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -27,7 +27,7 @@ TARGETS_HOTPLUG += memory-hotplug # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. ifeq (1,$(MAKELEVEL)) -undefine LDFLAGS +override LDFLAGS = override MAKEFLAGS = endif diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 78fb8201014f7..934d56f6803c3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1561,7 +1561,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, goto out; } - if (irq_num >= kvm->arch.vgic.nr_irqs) + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) return -EINVAL; vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); @@ -1602,8 +1602,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + int sz = nr_longs * sizeof(unsigned long); vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); @@ -2161,10 +2161,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, BUG_ON(!vgic_initialized(kvm)); - if (spi > kvm->arch.vgic.nr_irqs) - return -EINVAL; return kvm_vgic_inject_irq(kvm, 0, spi, level); - } /* MSI not implemented yet */ diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f9..f84f5856520a4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -169,7 +169,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, * do alloc nowait since if we are going to sleep anyway we * may as well sleep faulting in page */ - work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); + work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); if (!work) return 0; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa493..79db45336e3a2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -843,16 +817,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -860,8 +824,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -873,14 +835,13 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -901,10 +862,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -918,6 +875,71 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + + return ret; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + int ret; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && + args->flags & (KVM_IOEVENTFD_FLAG_PIO | + KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 90977418aeb6e..85422985235f2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2935,10 +2935,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, const struct kvm_io_range *r2) { - if (r1->addr < r2->addr) + gpa_t addr1 = r1->addr; + gpa_t addr2 = r2->addr; + + if (addr1 < addr2) return -1; - if (r1->addr + r1->len > r2->addr + r2->len) + + /* If r2->len == 0, match the exact address. If r2->len != 0, + * accept any overlapping write. Any order is acceptable for + * overlapping ranges, because kvm_io_bus_get_first_dev ensures + * we process all of them. + */ + if (r2->len) { + addr1 += r1->len; + addr2 += r2->len; + } + + if (addr1 > addr2) return 1; + return 0; }