}
EXPORT_SYMBOL(dma_find_channel);
+ /*
+ * net_dma_find_channel - find a channel for net_dma
+ * net_dma has alignment requirements
+ */
+ struct dma_chan *net_dma_find_channel(void)
+ {
+ struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
+ if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
+ return NULL;
+
+ return chan;
+ }
+ EXPORT_SYMBOL(net_dma_find_channel);
+
/**
* dma_issue_pending_all - flush all pending operations across all channels
*/
dma_chan_name(chan));
list_del_rcu(&device->global_node);
} else if (err)
- pr_debug("dmaengine: failed to get %s: (%d)\n",
- dma_chan_name(chan), err);
+ pr_debug("%s: failed to get %s: (%d)\n",
+ __func__, dma_chan_name(chan), err);
else
break;
if (--device->privatecnt == 0)
list_del_rcu(&device->global_node);
break;
} else if (err)
- pr_err("dmaengine: failed to get %s: (%d)\n",
- dma_chan_name(chan), err);
+ pr_err("%s: failed to get %s: (%d)\n",
+ __func__, dma_chan_name(chan), err);
}
}
#include "registers.h"
#include "hw.h"
+#include "../dmaengine.h"
+
int ioat_pending_level = 4;
module_param(ioat_pending_level, int, 0644);
MODULE_PARM_DESC(ioat_pending_level,
chan->reg_base = device->reg_base + (0x80 * (idx + 1));
spin_lock_init(&chan->cleanup_lock);
chan->common.device = dma;
+ dma_cookie_init(&chan->common);
list_add_tail(&chan->common.device_node, &dma->channels);
device->idx[idx] = chan;
init_timer(&chan->timer);
spin_lock_bh(&ioat->desc_lock);
/* cookie incr and addition to used_list must be atomic */
- cookie = c->cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- c->cookie = cookie;
- tx->cookie = cookie;
+ cookie = dma_cookie_assign(tx);
dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
/* write address into NextDescriptor field of last desc in chain */
PCI_DMA_TODEVICE, flags, 0);
}
- unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+ dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
{
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
u64 completion;
completion = *chan->completion;
}
bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
- unsigned long *phys_complete)
+ dma_addr_t *phys_complete)
{
*phys_complete = ioat_get_current_completion(chan);
if (*phys_complete == chan->last_completion)
return true;
}
- static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
{
struct ioat_chan_common *chan = &ioat->base;
struct list_head *_desc, *n;
struct dma_async_tx_descriptor *tx;
- dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
- __func__, phys_complete);
+ dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n",
+ __func__, (unsigned long long) phys_complete);
list_for_each_safe(_desc, n, &ioat->used_desc) {
struct ioat_desc_sw *desc;
*/
dump_desc_dbg(ioat, desc);
if (tx->cookie) {
- chan->completed_cookie = tx->cookie;
- tx->cookie = 0;
+ dma_cookie_complete(tx);
ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
ioat->active -= desc->hw->tx_cnt;
if (tx->callback) {
static void ioat1_cleanup(struct ioat_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
prefetch(chan->completion);
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
spin_unlock_bh(&ioat->desc_lock);
} else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
spin_lock_bh(&ioat->desc_lock);
/* if we haven't made progress and we have already
{
struct ioat_chan_common *chan = to_chan_common(c);
struct ioatdma_device *device = chan->device;
+ enum dma_status ret;
- if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
- return DMA_SUCCESS;
+ ret = dma_cookie_status(c, cookie, txstate);
+ if (ret == DMA_SUCCESS)
+ return ret;
device->cleanup_fn((unsigned long) c);
- return ioat_tx_status(c, cookie, txstate);
+ return dma_cookie_status(c, cookie, txstate);
}
static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
struct ioat_chan_common {
struct dma_chan common;
void __iomem *reg_base;
- unsigned long last_completion;
+ dma_addr_t last_completion;
spinlock_t cleanup_lock;
- dma_cookie_t completed_cookie;
unsigned long state;
#define IOAT_COMPLETION_PENDING 0
#define IOAT_COMPLETION_ACK 1
return container_of(chan, struct ioat_dma_chan, base);
}
-/**
- * ioat_tx_status - poll the status of an ioat transaction
- * @c: channel handle
- * @cookie: transaction identifier
- * @txstate: if set, updated with the transaction state
- */
-static inline enum dma_status
-ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
- struct dma_tx_state *txstate)
-{
- struct ioat_chan_common *chan = to_chan_common(c);
- dma_cookie_t last_used;
- dma_cookie_t last_complete;
-
- last_used = c->cookie;
- last_complete = chan->completed_cookie;
-
- dma_set_tx_state(txstate, last_complete, last_used, 0);
-
- return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
/* wrapper around hardware descriptor format + additional software fields */
/**
void __devexit ioat_dma_remove(struct ioatdma_device *device);
struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
void __iomem *iobase);
- unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
+ dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan);
void ioat_init_channel(struct ioatdma_device *device,
struct ioat_chan_common *chan, int idx);
enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
size_t len, struct ioat_dma_descriptor *hw);
bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
- unsigned long *phys_complete);
+ dma_addr_t *phys_complete);
void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
void ioat_kobject_del(struct ioatdma_device *device);
extern const struct sysfs_ops ioat_sysfs_ops;
#include "registers.h"
#include "hw.h"
+#include "../dmaengine.h"
+
int ioat_ring_alloc_order = 8;
module_param(ioat_ring_alloc_order, int, 0644);
MODULE_PARM_DESC(ioat_ring_alloc_order,
spin_unlock_bh(&ioat->prep_lock);
}
- static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
{
struct ioat_chan_common *chan = &ioat->base;
struct dma_async_tx_descriptor *tx;
dump_desc_dbg(ioat, desc);
if (tx->cookie) {
ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
- chan->completed_cookie = tx->cookie;
- tx->cookie = 0;
+ dma_cookie_complete(tx);
if (tx->callback) {
tx->callback(tx->callback_param);
tx->callback = NULL;
static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
spin_lock_bh(&chan->cleanup_lock);
if (ioat_cleanup_preamble(chan, &phys_complete))
static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
ioat2_quiesce(chan, 0);
if (ioat_cleanup_preamble(chan, &phys_complete))
struct ioat_chan_common *chan = &ioat->base;
if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
u64 status;
status = ioat_chansts(chan);
struct dma_chan *c = tx->chan;
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_chan_common *chan = &ioat->base;
- dma_cookie_t cookie = c->cookie;
+ dma_cookie_t cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- tx->cookie = cookie;
- c->cookie = cookie;
+ cookie = dma_cookie_assign(tx);
dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
*/
struct ioat_chan_common *chan = &ioat->base;
struct dma_chan *c = &chan->common;
- const u16 curr_size = ioat2_ring_size(ioat);
+ const u32 curr_size = ioat2_ring_size(ioat);
const u16 active = ioat2_ring_active(ioat);
- const u16 new_size = 1 << order;
+ const u32 new_size = 1 << order;
struct ioat_ring_ent **ring;
u16 i;
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/prefetch.h>
+#include "../dmaengine.h"
#include "registers.h"
#include "hw.h"
#include "dma.h"
* The difference from the dma_v2.c __cleanup() is that this routine
* handles extended descriptors and dma-unmapping raid operations.
*/
- static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
{
struct ioat_chan_common *chan = &ioat->base;
struct ioat_ring_ent *desc;
dump_desc_dbg(ioat, desc);
tx = &desc->txd;
if (tx->cookie) {
- chan->completed_cookie = tx->cookie;
+ dma_cookie_complete(tx);
ioat3_dma_unmap(ioat, desc, idx + i);
- tx->cookie = 0;
if (tx->callback) {
tx->callback(tx->callback_param);
tx->callback = NULL;
static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
spin_lock_bh(&chan->cleanup_lock);
if (ioat_cleanup_preamble(chan, &phys_complete))
static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
ioat2_quiesce(chan, 0);
if (ioat_cleanup_preamble(chan, &phys_complete))
struct ioat_chan_common *chan = &ioat->base;
if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
- unsigned long phys_complete;
+ dma_addr_t phys_complete;
u64 status;
status = ioat_chansts(chan);
struct dma_tx_state *txstate)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+ enum dma_status ret;
- if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
- return DMA_SUCCESS;
+ ret = dma_cookie_status(c, cookie, txstate);
+ if (ret == DMA_SUCCESS)
+ return ret;
ioat3_cleanup(ioat);
- return ioat_tx_status(c, cookie, txstate);
+ return dma_cookie_status(c, cookie, txstate);
}
static struct dma_async_tx_descriptor *
return ioat2_reset_sync(chan, msecs_to_jiffies(200));
}
+ static bool is_jf_ioat(struct pci_dev *pdev)
+ {
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static bool is_snb_ioat(struct pci_dev *pdev)
+ {
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+ return true;
+ default:
+ return false;
+ }
+ }
+
int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
{
struct pci_dev *pdev = device->pdev;
dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
dma->device_free_chan_resources = ioat2_free_chan_resources;
+ if (is_jf_ioat(pdev) || is_snb_ioat(pdev))
+ dma->copy_align = 6;
+
dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
#include <mach/adma.h>
+#include "dmaengine.h"
+
#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
#define to_iop_adma_device(dev) \
container_of(dev, struct iop_adma_device, common)
}
if (cookie > 0) {
- iop_chan->completed_cookie = cookie;
+ iop_chan->common.completed_cookie = cookie;
pr_debug("\tcompleted cookie %d\n", cookie);
}
}
return NULL;
}
-static dma_cookie_t
-iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
- struct iop_adma_desc_slot *desc)
-{
- dma_cookie_t cookie = iop_chan->common.cookie;
- cookie++;
- if (cookie < 0)
- cookie = 1;
- iop_chan->common.cookie = desc->async_tx.cookie = cookie;
- return cookie;
-}
-
static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
{
dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
slots_per_op = grp_start->slots_per_op;
spin_lock_bh(&iop_chan->lock);
- cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
+ cookie = dma_cookie_assign(tx);
old_chain_tail = list_entry(iop_chan->chain.prev,
struct iop_adma_desc_slot, chain_node);
struct dma_tx_state *txstate)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
- dma_cookie_t last_used;
- dma_cookie_t last_complete;
- enum dma_status ret;
-
- last_used = chan->cookie;
- last_complete = iop_chan->completed_cookie;
- dma_set_tx_state(txstate, last_complete, last_used, 0);
- ret = dma_async_is_complete(cookie, last_complete, last_used);
+ int ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
if (ret == DMA_SUCCESS)
return ret;
iop_adma_slot_cleanup(iop_chan);
- last_used = chan->cookie;
- last_complete = iop_chan->completed_cookie;
- dma_set_tx_state(txstate, last_complete, last_used, 0);
-
- return dma_async_is_complete(cookie, last_complete, last_used);
+ return dma_cookie_status(chan, cookie, txstate);
}
static irqreturn_t iop_adma_eot_handler(int irq, void *data)
struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
/* address conversion buffers (dma_map / page_address) */
void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
- dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
- dma_addr_t pq_dest[2];
+ dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2];
+ dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST];
int i;
struct dma_async_tx_descriptor *tx;
goto err_free_adev;
}
- dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
+ dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %p\n",
__func__, adev->dma_desc_pool_virt,
(void *) adev->dma_desc_pool);
INIT_LIST_HEAD(&iop_chan->chain);
INIT_LIST_HEAD(&iop_chan->all_slots);
iop_chan->common.device = dma_dev;
+ dma_cookie_init(&iop_chan->common);
list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
iop_desc_set_dest_addr(grp_start, iop_chan, 0);
iop_desc_set_memcpy_src_addr(grp_start, 0);
- cookie = iop_chan->common.cookie;
- cookie++;
- if (cookie <= 1)
- cookie = 2;
+ cookie = dma_cookie_assign(&sw_desc->async_tx);
/* initialize the completed cookie to be less than
* the most recently used cookie
*/
- iop_chan->completed_cookie = cookie - 1;
- iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+ iop_chan->common.completed_cookie = cookie - 1;
/* channel should not be busy */
BUG_ON(iop_chan_is_busy(iop_chan));
iop_desc_set_xor_src_addr(grp_start, 0, 0);
iop_desc_set_xor_src_addr(grp_start, 1, 0);
- cookie = iop_chan->common.cookie;
- cookie++;
- if (cookie <= 1)
- cookie = 2;
+ cookie = dma_cookie_assign(&sw_desc->async_tx);
/* initialize the completed cookie to be less than
* the most recently used cookie
*/
- iop_chan->completed_cookie = cookie - 1;
- iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+ iop_chan->common.completed_cookie = cookie - 1;
/* channel should not be busy */
BUG_ON(iop_chan_is_busy(iop_chan));
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
-#ifndef DMAENGINE_H
-#define DMAENGINE_H
+#ifndef LINUX_DMAENGINE_H
+#define LINUX_DMAENGINE_H
#include <linux/device.h>
#include <linux/uio.h>
+#include <linux/bug.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
+#include <linux/types.h>
#include <asm/page.h>
/**
* struct dma_chan - devices supply DMA channels, clients use them
* @device: ptr to the dma device who supplies this channel, always !%NULL
* @cookie: last cookie value returned to client
+ * @completed_cookie: last completed cookie for this channel
* @chan_id: channel ID for sysfs
* @dev: class device for sysfs
* @device_node: used to add this to the device chan list
struct dma_chan {
struct dma_device *device;
dma_cookie_t cookie;
+ dma_cookie_t completed_cookie;
/* sysfs */
int chan_id;
* may or may not be applicable on memory sources.
* @dst_maxburst: same as src_maxburst but for destination target
* mutatis mutandis.
+ * @device_fc: Flow Controller Settings. Only valid for slave channels. Fill
+ * with 'true' if peripheral should be flow controller. Direction will be
+ * selected at Runtime.
*
* This struct is passed in as configuration data to a DMA engine
* in order to set up a certain channel for DMA transport at runtime.
enum dma_slave_buswidth dst_addr_width;
u32 src_maxburst;
u32 dst_maxburst;
+ bool device_fc;
};
static inline const char *dma_chan_name(struct dma_chan *chan)
struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
struct dma_chan *chan, struct scatterlist *sgl,
unsigned int sg_len, enum dma_transfer_direction direction,
- unsigned long flags);
+ unsigned long flags, void *context);
struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
- size_t period_len, enum dma_transfer_direction direction);
+ size_t period_len, enum dma_transfer_direction direction,
+ void *context);
struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
struct dma_chan *chan, struct dma_interleaved_template *xt,
unsigned long flags);
struct scatterlist sg;
sg_init_one(&sg, buf, len);
- return chan->device->device_prep_slave_sg(chan, &sg, 1, dir, flags);
+ return chan->device->device_prep_slave_sg(chan, &sg, 1,
+ dir, flags, NULL);
+}
+
+static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
+ dir, flags, NULL);
+}
+
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction dir)
+{
+ return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len,
+ period_len, dir, NULL);
}
static inline int dmaengine_terminate_all(struct dma_chan *chan)
void dma_async_device_unregister(struct dma_device *device);
void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
+ struct dma_chan *net_dma_find_channel(void);
#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
/* --- Helper iov-locking functions --- */
* TCP_CLOSE socket is finished
*/
+#define pr_fmt(fmt) "TCP: " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
}
out:
- if (copied)
+ if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
tcp_push(sk, flags, mss_now, tp->nonagle);
return copied;
if ((available < target) &&
(len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
!sysctl_tcp_low_latency &&
- dma_find_channel(DMA_MEMCPY)) {
+ net_dma_find_channel()) {
preempt_enable_no_resched();
tp->ucopy.pinned_list =
dma_pin_iovec_pages(msg->msg_iov, len);
if (!(flags & MSG_TRUNC)) {
#ifdef CONFIG_NET_DMA
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+ tp->ucopy.dma_chan = net_dma_find_channel();
if (tp->ucopy.dma_chan) {
tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
if (tp->ucopy.dma_cookie < 0) {
- printk(KERN_ALERT "dma_cookie < 0\n");
+ pr_alert("%s: dma_cookie < 0\n",
+ __func__);
/* Exception. Bailout! */
if (!copied)
out_of_socket_memory = tcp_out_of_memory(sk);
if (too_many_orphans && net_ratelimit())
- pr_info("TCP: too many orphaned sockets\n");
+ pr_info("too many orphaned sockets\n");
if (out_of_socket_memory && net_ratelimit())
- pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
+ pr_info("out of memory -- consider tuning tcp_mem\n");
return too_many_orphans || out_of_socket_memory;
}
sysctl_tcp_rmem[1] = 87380;
sysctl_tcp_rmem[2] = max(87380, max_share);
- printk(KERN_INFO "TCP: Hash tables configured "
- "(established %u bind %u)\n",
- tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
+ pr_info("Hash tables configured (established %u bind %u)\n",
+ tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
tcp_register_congestion_control(&tcp_reno);
* Pasi Sarolahti: F-RTO for dealing with spurious RTOs
*/
+#define pr_fmt(fmt) "TCP: " fmt
+
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/module.h>
opt_rx->wscale_ok = 1;
if (snd_wscale > 14) {
if (net_ratelimit())
- printk(KERN_INFO "tcp_parse_options: Illegal window "
- "scaling value %d >14 received.\n",
- snd_wscale);
+ pr_info("%s: Illegal window scaling value %d >14 received\n",
+ __func__,
+ snd_wscale);
snd_wscale = 14;
}
opt_rx->snd_wscale = snd_wscale;
/* Only TCP_LISTEN and TCP_CLOSE are left, in these
* cases we should never reach this piece of code.
*/
- printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+ pr_err("%s: Impossible, sk->sk_state=%d\n",
__func__, sk->sk_state);
break;
}
return 0;
}
+static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb1;
+ u32 seq, end_seq;
+
+ TCP_ECN_check_ce(tp, skb);
+
+ if (tcp_try_rmem_schedule(sk, skb->truesize)) {
+ /* TODO: should increment a counter */
+ __kfree_skb(skb);
+ return;
+ }
+
+ /* Disable header prediction. */
+ tp->pred_flags = 0;
+ inet_csk_schedule_ack(sk);
+
+ SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+ tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+ skb1 = skb_peek_tail(&tp->out_of_order_queue);
+ if (!skb1) {
+ /* Initial out of order segment, build 1 SACK. */
+ if (tcp_is_sack(tp)) {
+ tp->rx_opt.num_sacks = 1;
+ tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+ tp->selective_acks[0].end_seq =
+ TCP_SKB_CB(skb)->end_seq;
+ }
+ __skb_queue_head(&tp->out_of_order_queue, skb);
+ goto end;
+ }
+
+ seq = TCP_SKB_CB(skb)->seq;
+ end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ if (seq == TCP_SKB_CB(skb1)->end_seq) {
+ /* Packets in ofo can stay in queue a long time.
+ * Better try to coalesce them right now
+ * to avoid future tcp_collapse_ofo_queue(),
+ * probably the most expensive function in tcp stack.
+ */
+ if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPRCVCOALESCE);
+ BUG_ON(skb_copy_bits(skb, 0,
+ skb_put(skb1, skb->len),
+ skb->len));
+ TCP_SKB_CB(skb1)->end_seq = end_seq;
+ TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ __kfree_skb(skb);
+ skb = NULL;
+ } else {
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ }
+
+ if (!tp->rx_opt.num_sacks ||
+ tp->selective_acks[0].end_seq != seq)
+ goto add_sack;
+
+ /* Common case: data arrive in order after hole. */
+ tp->selective_acks[0].end_seq = end_seq;
+ goto end;
+ }
+
+ /* Find place to insert this segment. */
+ while (1) {
+ if (!after(TCP_SKB_CB(skb1)->seq, seq))
+ break;
+ if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
+ skb1 = NULL;
+ break;
+ }
+ skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+ }
+
+ /* Do skb overlap to previous one? */
+ if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ /* All the bits are present. Drop. */
+ __kfree_skb(skb);
+ skb = NULL;
+ tcp_dsack_set(sk, seq, end_seq);
+ goto add_sack;
+ }
+ if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+ /* Partial overlap. */
+ tcp_dsack_set(sk, seq,
+ TCP_SKB_CB(skb1)->end_seq);
+ } else {
+ if (skb_queue_is_first(&tp->out_of_order_queue,
+ skb1))
+ skb1 = NULL;
+ else
+ skb1 = skb_queue_prev(
+ &tp->out_of_order_queue,
+ skb1);
+ }
+ }
+ if (!skb1)
+ __skb_queue_head(&tp->out_of_order_queue, skb);
+ else
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+
+ /* And clean segments covered by new one as whole. */
+ while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
+ skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+
+ if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
+ break;
+ if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+ end_seq);
+ break;
+ }
+ __skb_unlink(skb1, &tp->out_of_order_queue);
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+ TCP_SKB_CB(skb1)->end_seq);
+ __kfree_skb(skb1);
+ }
+
+add_sack:
+ if (tcp_is_sack(tp))
+ tcp_sack_new_ofo_skb(sk, seq, end_seq);
+end:
+ if (skb)
+ skb_set_owner_r(skb, sk);
+}
+
+
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
goto queue_and_out;
}
- TCP_ECN_check_ce(tp, skb);
-
- if (tcp_try_rmem_schedule(sk, skb->truesize))
- goto drop;
-
- /* Disable header prediction. */
- tp->pred_flags = 0;
- inet_csk_schedule_ack(sk);
-
- SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
-
- skb_set_owner_r(skb, sk);
-
- if (!skb_peek(&tp->out_of_order_queue)) {
- /* Initial out of order segment, build 1 SACK. */
- if (tcp_is_sack(tp)) {
- tp->rx_opt.num_sacks = 1;
- tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- tp->selective_acks[0].end_seq =
- TCP_SKB_CB(skb)->end_seq;
- }
- __skb_queue_head(&tp->out_of_order_queue, skb);
- } else {
- struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
- u32 seq = TCP_SKB_CB(skb)->seq;
- u32 end_seq = TCP_SKB_CB(skb)->end_seq;
-
- if (seq == TCP_SKB_CB(skb1)->end_seq) {
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
- if (!tp->rx_opt.num_sacks ||
- tp->selective_acks[0].end_seq != seq)
- goto add_sack;
-
- /* Common case: data arrive in order after hole. */
- tp->selective_acks[0].end_seq = end_seq;
- return;
- }
-
- /* Find place to insert this segment. */
- while (1) {
- if (!after(TCP_SKB_CB(skb1)->seq, seq))
- break;
- if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
- skb1 = NULL;
- break;
- }
- skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
- }
-
- /* Do skb overlap to previous one? */
- if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
- if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- /* All the bits are present. Drop. */
- __kfree_skb(skb);
- tcp_dsack_set(sk, seq, end_seq);
- goto add_sack;
- }
- if (after(seq, TCP_SKB_CB(skb1)->seq)) {
- /* Partial overlap. */
- tcp_dsack_set(sk, seq,
- TCP_SKB_CB(skb1)->end_seq);
- } else {
- if (skb_queue_is_first(&tp->out_of_order_queue,
- skb1))
- skb1 = NULL;
- else
- skb1 = skb_queue_prev(
- &tp->out_of_order_queue,
- skb1);
- }
- }
- if (!skb1)
- __skb_queue_head(&tp->out_of_order_queue, skb);
- else
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
- /* And clean segments covered by new one as whole. */
- while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
- skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
-
- if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
- break;
- if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
- end_seq);
- break;
- }
- __skb_unlink(skb1, &tp->out_of_order_queue);
- tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
- TCP_SKB_CB(skb1)->end_seq);
- __kfree_skb(skb1);
- }
-
-add_sack:
- if (tcp_is_sack(tp))
- tcp_sack_new_ofo_skb(sk, seq, end_seq);
- }
+ tcp_data_queue_ofo(sk, skb);
}
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
return 0;
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+ tp->ucopy.dma_chan = net_dma_find_channel();
if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
* a single port at the same time.
*/
+#define pr_fmt(fmt) "TCP: " fmt
#include <linux/bottom_half.h>
#include <linux/types.h>
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
- __be32 addr);
-static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th);
-#else
-static inline
-struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
-{
- return NULL;
-}
#endif
struct inet_hashinfo tcp_hashinfo;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
+ const __u8 *hash_location = NULL;
+ unsigned char newhash[16];
+ int genhash;
+ struct sock *sk1 = NULL;
#endif
struct net *net;
arg.iov[0].iov_len = sizeof(rep.th);
#ifdef CONFIG_TCP_MD5SIG
- key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+ hash_location = tcp_parse_md5sig_option(th);
+ if (!sk && hash_location) {
+ /*
+ * active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, ip_hdr(skb)->daddr,
+ ntohs(th->source), inet_iif(skb));
+ /* don't send rst if it can't find key */
+ if (!sk1)
+ return;
+ rcu_read_lock();
+ key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
+ &ip_hdr(skb)->saddr, AF_INET);
+ if (!key)
+ goto release_sk1;
+
+ genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto release_sk1;
+ } else {
+ key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
+ &ip_hdr(skb)->saddr,
+ AF_INET) : NULL;
+ }
+
if (key) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+ if (sk1) {
+ rcu_read_unlock();
+ sock_put(sk1);
+ }
+#endif
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
req->ts_recent,
0,
- tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
}
lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
if (!lopt->synflood_warned) {
lopt->synflood_warned = 1;
- pr_info("%s: Possible SYN flooding on port %d. %s. "
- " Check SNMP counters.\n",
+ pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
}
return want_cookie;
*/
/* Find the Key structure for an address. */
-static struct tcp_md5sig_key *
- tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family)
{
struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- if (!tp->md5sig_info || !tp->md5sig_info->entries4)
+ struct tcp_md5sig_key *key;
+ struct hlist_node *pos;
+ unsigned int size = sizeof(struct in_addr);
+ struct tcp_md5sig_info *md5sig;
+
+ /* caller either holds rcu_read_lock() or socket lock */
+ md5sig = rcu_dereference_check(tp->md5sig_info,
+ sock_owned_by_user(sk) ||
+ lockdep_is_held(&sk->sk_lock.slock));
+ if (!md5sig)
return NULL;
- for (i = 0; i < tp->md5sig_info->entries4; i++) {
- if (tp->md5sig_info->keys4[i].addr == addr)
- return &tp->md5sig_info->keys4[i].base;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6)
+ size = sizeof(struct in6_addr);
+#endif
+ hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
+ if (key->family != family)
+ continue;
+ if (!memcmp(&key->addr, addr, size))
+ return key;
}
return NULL;
}
+EXPORT_SYMBOL(tcp_md5_do_lookup);
struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
struct sock *addr_sk)
{
- return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
+ union tcp_md5_addr *addr;
+
+ addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
+ return tcp_md5_do_lookup(sk, addr, AF_INET);
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);
static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
struct request_sock *req)
{
- return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
+ union tcp_md5_addr *addr;
+
+ addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+ return tcp_md5_do_lookup(sk, addr, AF_INET);
}
/* This can be called on a newly created socket, from other files */
-int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
- u8 *newkey, u8 newkeylen)
+int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
struct tcp_md5sig_key *key;
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp4_md5sig_key *keys;
+ struct tcp_md5sig_info *md5sig;
- key = tcp_v4_md5_do_lookup(sk, addr);
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
if (key) {
/* Pre-existing entry - just update that one. */
- kfree(key->key);
- key->key = newkey;
+ memcpy(key->key, newkey, newkeylen);
key->keylen = newkeylen;
- } else {
- struct tcp_md5sig_info *md5sig;
-
- if (!tp->md5sig_info) {
- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
- GFP_ATOMIC);
- if (!tp->md5sig_info) {
- kfree(newkey);
- return -ENOMEM;
- }
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
+ return 0;
+ }
- md5sig = tp->md5sig_info;
- if (md5sig->entries4 == 0 &&
- tcp_alloc_md5sig_pool(sk) == NULL) {
- kfree(newkey);
+ md5sig = rcu_dereference_protected(tp->md5sig_info,
+ sock_owned_by_user(sk));
+ if (!md5sig) {
+ md5sig = kmalloc(sizeof(*md5sig), gfp);
+ if (!md5sig)
return -ENOMEM;
- }
-
- if (md5sig->alloced4 == md5sig->entries4) {
- keys = kmalloc((sizeof(*keys) *
- (md5sig->entries4 + 1)), GFP_ATOMIC);
- if (!keys) {
- kfree(newkey);
- if (md5sig->entries4 == 0)
- tcp_free_md5sig_pool();
- return -ENOMEM;
- }
- if (md5sig->entries4)
- memcpy(keys, md5sig->keys4,
- sizeof(*keys) * md5sig->entries4);
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+ INIT_HLIST_HEAD(&md5sig->head);
+ rcu_assign_pointer(tp->md5sig_info, md5sig);
+ }
- /* Free old key list, and reference new one */
- kfree(md5sig->keys4);
- md5sig->keys4 = keys;
- md5sig->alloced4++;
- }
- md5sig->entries4++;
- md5sig->keys4[md5sig->entries4 - 1].addr = addr;
- md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
- md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
+ key = sock_kmalloc(sk, sizeof(*key), gfp);
+ if (!key)
+ return -ENOMEM;
+ if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
+ sock_kfree_s(sk, key, sizeof(*key));
+ return -ENOMEM;
}
- return 0;
-}
-EXPORT_SYMBOL(tcp_v4_md5_do_add);
-static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
- u8 *newkey, u8 newkeylen)
-{
- return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
- newkey, newkeylen);
+ memcpy(key->key, newkey, newkeylen);
+ key->keylen = newkeylen;
+ key->family = family;
+ memcpy(&key->addr, addr,
+ (family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
+ hlist_add_head_rcu(&key->node, &md5sig->head);
+ return 0;
}
+EXPORT_SYMBOL(tcp_md5_do_add);
-int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
+int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
{
struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- for (i = 0; i < tp->md5sig_info->entries4; i++) {
- if (tp->md5sig_info->keys4[i].addr == addr) {
- /* Free the key */
- kfree(tp->md5sig_info->keys4[i].base.key);
- tp->md5sig_info->entries4--;
-
- if (tp->md5sig_info->entries4 == 0) {
- kfree(tp->md5sig_info->keys4);
- tp->md5sig_info->keys4 = NULL;
- tp->md5sig_info->alloced4 = 0;
- tcp_free_md5sig_pool();
- } else if (tp->md5sig_info->entries4 != i) {
- /* Need to do some manipulation */
- memmove(&tp->md5sig_info->keys4[i],
- &tp->md5sig_info->keys4[i+1],
- (tp->md5sig_info->entries4 - i) *
- sizeof(struct tcp4_md5sig_key));
- }
- return 0;
- }
- }
- return -ENOENT;
+ struct tcp_md5sig_key *key;
+ struct tcp_md5sig_info *md5sig;
+
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+ if (!key)
+ return -ENOENT;
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ kfree_rcu(key, rcu);
+ md5sig = rcu_dereference_protected(tp->md5sig_info,
+ sock_owned_by_user(sk));
+ if (hlist_empty(&md5sig->head))
+ tcp_free_md5sig_pool();
+ return 0;
}
-EXPORT_SYMBOL(tcp_v4_md5_do_del);
+EXPORT_SYMBOL(tcp_md5_do_del);
-static void tcp_v4_clear_md5_list(struct sock *sk)
+void tcp_clear_md5_list(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ struct hlist_node *pos, *n;
+ struct tcp_md5sig_info *md5sig;
- /* Free each key, then the set of key keys,
- * the crypto element, and then decrement our
- * hold on the last resort crypto.
- */
- if (tp->md5sig_info->entries4) {
- int i;
- for (i = 0; i < tp->md5sig_info->entries4; i++)
- kfree(tp->md5sig_info->keys4[i].base.key);
- tp->md5sig_info->entries4 = 0;
+ md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
+
+ if (!hlist_empty(&md5sig->head))
tcp_free_md5sig_pool();
- }
- if (tp->md5sig_info->keys4) {
- kfree(tp->md5sig_info->keys4);
- tp->md5sig_info->keys4 = NULL;
- tp->md5sig_info->alloced4 = 0;
+ hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ kfree_rcu(key, rcu);
}
}
{
struct tcp_md5sig cmd;
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
- u8 *newkey;
if (optlen < sizeof(cmd))
return -EINVAL;
if (sin->sin_family != AF_INET)
return -EINVAL;
- if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
- if (!tcp_sk(sk)->md5sig_info)
- return -ENOENT;
- return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
- }
+ if (!cmd.tcpm_key || !cmd.tcpm_keylen)
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+ AF_INET);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (!tcp_sk(sk)->md5sig_info) {
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *p;
-
- p = kzalloc(sizeof(*p), sk->sk_allocation);
- if (!p)
- return -EINVAL;
-
- tp->md5sig_info = p;
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
-
- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
- if (!newkey)
- return -ENOMEM;
- return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
- newkey, cmd.tcpm_keylen);
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+ AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
+ GFP_KERNEL);
}
static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
}
-static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
int genhash;
unsigned char newhash[16];
- hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+ hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
if (net_ratelimit()) {
- printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
- &iph->saddr, ntohs(th->source),
- &iph->daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed" : "");
+ pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+ &iph->saddr, ntohs(th->source),
+ &iph->daddr, ntohs(th->dest),
+ genhash ? " tcp_v4_calc_md5_hash failed" : "");
}
return 1;
}
* to destinations, already remembered
* to the moment of synflood.
*/
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
+ LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
&saddr, ntohs(tcp_hdr(skb)->source));
goto drop_and_release;
}
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
+ newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
+ AF_INET);
if (key != NULL) {
/*
* We're using one, so create a matching key
* memory, then we end up not copying the key
* across. Shucks.
*/
- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
- if (newkey != NULL)
- tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
- newkey, key->keylen);
+ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
+ AF_INET, key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
#endif
#ifdef CONFIG_NET_DMA
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+ tp->ucopy.dma_chan = net_dma_find_channel();
if (tp->ucopy.dma_chan)
ret = tcp_v4_do_rcv(sk, skb);
else
static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_add = tcp_v4_md5_add_func,
.md5_parse = tcp_v4_parse_md5_keys,
};
#endif
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
- tcp_v4_clear_md5_list(sk);
- kfree(tp->md5sig_info);
+ tcp_clear_md5_list(sk);
+ kfree_rcu(tp->md5sig_info, rcu);
tp->md5sig_info = NULL;
}
#endif
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
const struct in6_addr *addr)
{
- struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- BUG_ON(tp == NULL);
-
- if (!tp->md5sig_info || !tp->md5sig_info->entries6)
- return NULL;
-
- for (i = 0; i < tp->md5sig_info->entries6; i++) {
- if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
- return &tp->md5sig_info->keys6[i].base;
- }
- return NULL;
+ return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
}
static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
}
-static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
- char *newkey, u8 newkeylen)
-{
- /* Add key to the list */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp6_md5sig_key *keys;
-
- key = tcp_v6_md5_do_lookup(sk, peer);
- if (key) {
- /* modify existing entry - just update that one */
- kfree(key->key);
- key->key = newkey;
- key->keylen = newkeylen;
- } else {
- /* reallocate new list if current one is full. */
- if (!tp->md5sig_info) {
- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
- if (!tp->md5sig_info) {
- kfree(newkey);
- return -ENOMEM;
- }
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
- if (tp->md5sig_info->entries6 == 0 &&
- tcp_alloc_md5sig_pool(sk) == NULL) {
- kfree(newkey);
- return -ENOMEM;
- }
- if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
- keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
- (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
-
- if (!keys) {
- kfree(newkey);
- if (tp->md5sig_info->entries6 == 0)
- tcp_free_md5sig_pool();
- return -ENOMEM;
- }
-
- if (tp->md5sig_info->entries6)
- memmove(keys, tp->md5sig_info->keys6,
- (sizeof (tp->md5sig_info->keys6[0]) *
- tp->md5sig_info->entries6));
-
- kfree(tp->md5sig_info->keys6);
- tp->md5sig_info->keys6 = keys;
- tp->md5sig_info->alloced6++;
- }
-
- tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer;
- tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
- tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
-
- tp->md5sig_info->entries6++;
- }
- return 0;
-}
-
-static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
- u8 *newkey, __u8 newkeylen)
-{
- return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
- newkey, newkeylen);
-}
-
-static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- for (i = 0; i < tp->md5sig_info->entries6; i++) {
- if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
- /* Free the key */
- kfree(tp->md5sig_info->keys6[i].base.key);
- tp->md5sig_info->entries6--;
-
- if (tp->md5sig_info->entries6 == 0) {
- kfree(tp->md5sig_info->keys6);
- tp->md5sig_info->keys6 = NULL;
- tp->md5sig_info->alloced6 = 0;
- tcp_free_md5sig_pool();
- } else {
- /* shrink the database */
- if (tp->md5sig_info->entries6 != i)
- memmove(&tp->md5sig_info->keys6[i],
- &tp->md5sig_info->keys6[i+1],
- (tp->md5sig_info->entries6 - i)
- * sizeof (tp->md5sig_info->keys6[0]));
- }
- return 0;
- }
- }
- return -ENOENT;
-}
-
-static void tcp_v6_clear_md5_list (struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- if (tp->md5sig_info->entries6) {
- for (i = 0; i < tp->md5sig_info->entries6; i++)
- kfree(tp->md5sig_info->keys6[i].base.key);
- tp->md5sig_info->entries6 = 0;
- tcp_free_md5sig_pool();
- }
-
- kfree(tp->md5sig_info->keys6);
- tp->md5sig_info->keys6 = NULL;
- tp->md5sig_info->alloced6 = 0;
-
- if (tp->md5sig_info->entries4) {
- for (i = 0; i < tp->md5sig_info->entries4; i++)
- kfree(tp->md5sig_info->keys4[i].base.key);
- tp->md5sig_info->entries4 = 0;
- tcp_free_md5sig_pool();
- }
-
- kfree(tp->md5sig_info->keys4);
- tp->md5sig_info->keys4 = NULL;
- tp->md5sig_info->alloced4 = 0;
-}
-
static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- u8 *newkey;
if (optlen < sizeof(cmd))
return -EINVAL;
return -EINVAL;
if (!cmd.tcpm_keylen) {
- if (!tcp_sk(sk)->md5sig_info)
- return -ENOENT;
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
- return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (!tcp_sk(sk)->md5sig_info) {
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *p;
-
- p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
- tp->md5sig_info = p;
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
-
- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
- if (!newkey)
- return -ENOMEM;
- if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
- return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
- newkey, cmd.tcpm_keylen);
- }
- return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
const struct tcphdr *th = tcp_hdr(skb);
u32 seq = 0, ack_seq = 0;
struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+ const __u8 *hash_location = NULL;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ unsigned char newhash[16];
+ int genhash;
+ struct sock *sk1 = NULL;
+#endif
if (th->rst)
return;
return;
#ifdef CONFIG_TCP_MD5SIG
- if (sk)
- key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
+ if (!sk && hash_location) {
+ /*
+ * active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, &ipv6h->daddr,
+ ntohs(th->source), inet6_iif(skb));
+ if (!sk1)
+ return;
+
+ rcu_read_lock();
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!key)
+ goto release_sk1;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto release_sk1;
+ } else {
+ key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+ }
#endif
if (th->ack)
(th->doff << 2);
tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+ if (sk1) {
+ rcu_read_unlock();
+ sock_put(sk1);
+ }
+#endif
}
static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
/* Clone native IPv6 options from listening socket (if any)
* memory, then we end up not copying the key
* across. Shucks.
*/
- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
- if (newkey != NULL)
- tcp_v6_md5_do_add(newsk, &newnp->daddr,
- newkey, key->keylen);
+ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+ AF_INET6, key->key, key->keylen, GFP_ATOMIC);
}
#endif
np->mcast_oif = inet6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ if (np->rxopt.bits.rxtclass)
+ np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
#ifdef CONFIG_NET_DMA
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+ tp->ucopy.dma_chan = net_dma_find_channel();
if (tp->ucopy.dma_chan)
ret = tcp_v6_do_rcv(sk, skb);
else
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
.md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
- .md5_add = tcp_v6_md5_add_func,
.md5_parse = tcp_v6_parse_md5_keys,
};
#endif
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_add = tcp_v6_md5_add_func,
.md5_parse = tcp_v6_parse_md5_keys,
};
#endif
static void tcp_v6_destroy_sock(struct sock *sk)
{
-#ifdef CONFIG_TCP_MD5SIG
- /* Clean up the MD5 key list */
- if (tcp_sk(sk)->md5sig_info)
- tcp_v6_clear_md5_list(sk);
-#endif
tcp_v4_destroy_sock(sk);
inet6_destroy_sock(sk);
}