priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ if (of_phy_is_fixed_link(np)) {
+ err = of_phy_register_fixed_link(np);
+ if (err)
+ goto err_grp_init;
+
+ priv->phy_node = np;
+ }
+
/* Find the TBI PHY. If it's not there, we don't support SGMII */
priv->tbi_node = of_parse_phandle(np, "tbi-handle", 0);
gfar_write_isrg(priv);
}
-static void __init gfar_init_addr_hash_table(struct gfar_private *priv)
+static void gfar_init_addr_hash_table(struct gfar_private *priv)
{
struct gfar __iomem *regs = priv->gfargrp[0].regs;
gfar_hw_init(priv);
+ /* Carrier starts down, phylib will bring it up */
+ netif_carrier_off(dev);
+
err = register_netdev(dev);
if (err) {
goto register_fail;
}
- /* Carrier starts down, phylib will bring it up */
- netif_carrier_off(dev);
-
device_init_wakeup(&dev->dev,
priv->device_flags &
FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
priv->phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
interface);
- if (!priv->phydev)
- priv->phydev = of_phy_connect_fixed_link(dev, &adjust_link,
- interface);
if (!priv->phydev) {
dev_err(&dev->dev, "could not attach to PHY\n");
return -ENODEV;
static void qlcnic_sriov_process_bc_cmd(struct work_struct *);
static int qlcnic_sriov_vf_shutdown(struct pci_dev *);
static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *);
+static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *,
+ struct qlcnic_cmd_args *);
static struct qlcnic_hardware_ops qlcnic_sriov_vf_hw_ops = {
.read_crb = qlcnic_83xx_read_crb,
vf->adapter = adapter;
vf->pci_func = qlcnic_sriov_virtid_fn(adapter, i);
mutex_init(&vf->send_cmd_lock);
- mutex_init(&vf->vlan_list_lock);
+ spin_lock_init(&vf->vlan_list_lock);
INIT_LIST_HEAD(&vf->rcv_act.wait_list);
INIT_LIST_HEAD(&vf->rcv_pend.wait_list);
spin_lock_init(&vf->rcv_act.lock);
goto qlcnic_destroy_async_wq;
}
sriov->vf_info[i].vp = vp;
+ vp->vlan_mode = QLC_GUEST_VLAN_MODE;
vp->max_tx_bw = MAX_BW;
- vp->spoofchk = true;
+ vp->min_tx_bw = MIN_BW;
+ vp->spoofchk = false;
random_ether_addr(vp->mac);
dev_info(&adapter->pdev->dev,
"MAC Address %pM is configured for VF %d\n",
struct qlcnic_cmd_args cmd;
int ret = 0;
+ memset(&cmd, 0, sizeof(cmd));
ret = qlcnic_sriov_alloc_bc_mbx_args(&cmd, QLCNIC_BC_CMD_GET_ACL);
if (ret)
return ret;
{
int err;
+ adapter->flags |= QLCNIC_VLAN_FILTERING;
+ adapter->ahw->total_nic_func = 1;
INIT_LIST_HEAD(&adapter->vf_mc_list);
if (!qlcnic_use_msi_x && !!qlcnic_use_msi)
dev_warn(&adapter->pdev->dev,
cmd->req.arg = (u32 *)trans->req_pay;
cmd->rsp.arg = (u32 *)trans->rsp_pay;
cmd_op = cmd->req.arg[0] & 0xff;
+ cmd->cmd_op = cmd_op;
remainder = (trans->rsp_pay_size) % (bc_pay_sz);
num_frags = (trans->rsp_pay_size) / (bc_pay_sz);
if (remainder)
return -EIO;
}
-static int qlcnic_sriov_issue_cmd(struct qlcnic_adapter *adapter,
+static int __qlcnic_sriov_issue_cmd(struct qlcnic_adapter *adapter,
struct qlcnic_cmd_args *cmd)
{
struct qlcnic_hardware_context *ahw = adapter->ahw;
(mbx_err_code == QLCNIC_MBX_PORT_RSP_OK)) {
rsp = QLCNIC_RCODE_SUCCESS;
} else {
- rsp = mbx_err_code;
- if (!rsp)
- rsp = 1;
- dev_err(dev,
- "MBX command 0x%x failed with err:0x%x for VF %d\n",
- opcode, mbx_err_code, func);
+ if (cmd->type == QLC_83XX_MBX_CMD_NO_WAIT) {
+ rsp = QLCNIC_RCODE_SUCCESS;
+ } else {
+ rsp = mbx_err_code;
+ if (!rsp)
+ rsp = 1;
+
+ dev_err(dev,
+ "MBX command 0x%x failed with err:0x%x for VF %d\n",
+ opcode, mbx_err_code, func);
+ }
}
err_out:
return rsp;
}
+
+static int qlcnic_sriov_issue_cmd(struct qlcnic_adapter *adapter,
+ struct qlcnic_cmd_args *cmd)
+{
+ if (cmd->type == QLC_83XX_MBX_CMD_NO_WAIT)
+ return qlcnic_sriov_async_issue_cmd(adapter, cmd);
+ else
+ return __qlcnic_sriov_issue_cmd(adapter, cmd);
+}
+
static int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_op)
{
struct qlcnic_cmd_args cmd;
struct qlcnic_vf_info *vf = &adapter->ahw->sriov->vf_info[0];
int ret;
+ memset(&cmd, 0, sizeof(cmd));
if (qlcnic_sriov_alloc_bc_mbx_args(&cmd, cmd_op))
return -ENOMEM;
return ret;
}
-static void qlcnic_vf_add_mc_list(struct net_device *netdev)
+static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_sriov *sriov = adapter->ahw->sriov;
- struct qlcnic_mac_vlan_list *cur;
- struct list_head *head, tmp_list;
struct qlcnic_vf_info *vf;
u16 vlan_id;
int i;
- static const u8 bcast_addr[ETH_ALEN] = {
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
- };
-
vf = &adapter->ahw->sriov->vf_info[0];
- INIT_LIST_HEAD(&tmp_list);
- head = &adapter->vf_mc_list;
- netif_addr_lock_bh(netdev);
- while (!list_empty(head)) {
- cur = list_entry(head->next, struct qlcnic_mac_vlan_list, list);
- list_move(&cur->list, &tmp_list);
- }
-
- netif_addr_unlock_bh(netdev);
-
- while (!list_empty(&tmp_list)) {
- cur = list_entry((&tmp_list)->next,
- struct qlcnic_mac_vlan_list, list);
- if (!qlcnic_sriov_check_any_vlan(vf)) {
- qlcnic_nic_add_mac(adapter, bcast_addr, 0);
- qlcnic_nic_add_mac(adapter, cur->mac_addr, 0);
- } else {
- mutex_lock(&vf->vlan_list_lock);
- for (i = 0; i < sriov->num_allowed_vlans; i++) {
- vlan_id = vf->sriov_vlans[i];
- if (vlan_id) {
- qlcnic_nic_add_mac(adapter, bcast_addr,
- vlan_id);
- qlcnic_nic_add_mac(adapter,
- cur->mac_addr,
- vlan_id);
- }
- }
- mutex_unlock(&vf->vlan_list_lock);
- if (qlcnic_84xx_check(adapter)) {
- qlcnic_nic_add_mac(adapter, bcast_addr, 0);
- qlcnic_nic_add_mac(adapter, cur->mac_addr, 0);
- }
+ if (!qlcnic_sriov_check_any_vlan(vf)) {
+ qlcnic_nic_add_mac(adapter, mac, 0);
+ } else {
+ spin_lock(&vf->vlan_list_lock);
+ for (i = 0; i < sriov->num_allowed_vlans; i++) {
+ vlan_id = vf->sriov_vlans[i];
+ if (vlan_id)
+ qlcnic_nic_add_mac(adapter, mac, vlan_id);
}
- list_del(&cur->list);
- kfree(cur);
+ spin_unlock(&vf->vlan_list_lock);
+ if (qlcnic_84xx_check(adapter))
+ qlcnic_nic_add_mac(adapter, mac, 0);
}
}
struct list_head *head = &bc->async_list;
struct qlcnic_async_work_list *entry;
+ flush_workqueue(bc->bc_async_wq);
while (!list_empty(head)) {
entry = list_entry(head->next, struct qlcnic_async_work_list,
list);
}
}
-static void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
+void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_hardware_context *ahw = adapter->ahw;
+ static const u8 bcast_addr[ETH_ALEN] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ };
+ struct netdev_hw_addr *ha;
u32 mode = VPORT_MISS_MODE_DROP;
if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state))
} else if ((netdev->flags & IFF_ALLMULTI) ||
(netdev_mc_count(netdev) > ahw->max_mc_count)) {
mode = VPORT_MISS_MODE_ACCEPT_MULTI;
+ } else {
+ qlcnic_vf_add_mc_list(netdev, bcast_addr);
+ if (!netdev_mc_empty(netdev)) {
+ netdev_for_each_mc_addr(ha, netdev)
+ qlcnic_vf_add_mc_list(netdev, ha->addr);
+ }
}
- if (qlcnic_sriov_vf_check(adapter))
- qlcnic_vf_add_mc_list(netdev);
+ /* configure unicast MAC address, if there is not sufficient space
+ * to store all the unicast addresses then enable promiscuous mode
+ */
+ if (netdev_uc_count(netdev) > ahw->max_uc_count) {
+ mode = VPORT_MISS_MODE_ACCEPT_ALL;
+ } else if (!netdev_uc_empty(netdev)) {
+ netdev_for_each_uc_addr(ha, netdev)
+ qlcnic_vf_add_mc_list(netdev, ha->addr);
+ }
+
+ if (adapter->pdev->is_virtfn) {
+ if (mode == VPORT_MISS_MODE_ACCEPT_ALL &&
+ !adapter->fdb_mac_learn) {
+ qlcnic_alloc_lb_filters_mem(adapter);
+ adapter->drv_mac_learn = 1;
+ adapter->rx_mac_learn = true;
+ } else {
+ adapter->drv_mac_learn = 0;
+ adapter->rx_mac_learn = false;
+ }
+ }
qlcnic_nic_set_promisc(adapter, mode);
}
-static void qlcnic_sriov_handle_async_multi(struct work_struct *work)
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
{
struct qlcnic_async_work_list *entry;
- struct net_device *netdev;
+ struct qlcnic_adapter *adapter;
+ struct qlcnic_cmd_args *cmd;
entry = container_of(work, struct qlcnic_async_work_list, work);
- netdev = (struct net_device *)entry->ptr;
-
- qlcnic_sriov_vf_set_multi(netdev);
+ adapter = entry->ptr;
+ cmd = entry->cmd;
+ __qlcnic_sriov_issue_cmd(adapter, cmd);
return;
}
return entry;
}
-static void qlcnic_sriov_schedule_bc_async_work(struct qlcnic_back_channel *bc,
- work_func_t func, void *data)
+static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
+ work_func_t func, void *data,
+ struct qlcnic_cmd_args *cmd)
{
struct qlcnic_async_work_list *entry = NULL;
return;
entry->ptr = data;
+ entry->cmd = cmd;
INIT_WORK(&entry->work, func);
queue_work(bc->bc_async_wq, &entry->work);
}
-void qlcnic_sriov_vf_schedule_multi(struct net_device *netdev)
+static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
+ struct qlcnic_cmd_args *cmd)
{
- struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_back_channel *bc = &adapter->ahw->sriov->bc;
if (adapter->need_fw_reset)
- return;
+ return -EIO;
- qlcnic_sriov_schedule_bc_async_work(bc, qlcnic_sriov_handle_async_multi,
- netdev);
+ qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
+ adapter, cmd);
+ return 0;
}
static int qlcnic_sriov_vf_reinit_driver(struct qlcnic_adapter *adapter)
return 0;
}
+static void qlcnic_sriov_vf_periodic_tasks(struct qlcnic_adapter *adapter)
+{
+ if (adapter->fhash.fnum)
+ qlcnic_prune_lb_filters(adapter);
+}
+
static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *work)
{
struct qlcnic_adapter *adapter;
}
idc->prev_state = idc->curr_state;
+ qlcnic_sriov_vf_periodic_tasks(adapter);
+
if (!ret && test_bit(QLC_83XX_MODULE_LOADED, &idc->status))
qlcnic_schedule_work(adapter, qlcnic_sriov_vf_poll_dev_state,
idc->delay);
if (!vf->sriov_vlans)
return err;
- mutex_lock(&vf->vlan_list_lock);
+ spin_lock_bh(&vf->vlan_list_lock);
for (i = 0; i < sriov->num_allowed_vlans; i++) {
if (vf->sriov_vlans[i] == vlan_id) {
}
}
- mutex_unlock(&vf->vlan_list_lock);
+ spin_unlock_bh(&vf->vlan_list_lock);
return err;
}
{
int err = 0;
- mutex_lock(&vf->vlan_list_lock);
+ spin_lock_bh(&vf->vlan_list_lock);
if (vf->num_vlan >= sriov->num_allowed_vlans)
err = -EINVAL;
- mutex_unlock(&vf->vlan_list_lock);
+ spin_unlock_bh(&vf->vlan_list_lock);
return err;
}
if (!vf->sriov_vlans)
return;
- mutex_lock(&vf->vlan_list_lock);
+ spin_lock_bh(&vf->vlan_list_lock);
switch (opcode) {
case QLC_VLAN_ADD:
netdev_err(adapter->netdev, "Invalid VLAN operation\n");
}
- mutex_unlock(&vf->vlan_list_lock);
+ spin_unlock_bh(&vf->vlan_list_lock);
return;
}
u16 vid, u8 enable)
{
struct qlcnic_sriov *sriov = adapter->ahw->sriov;
+ struct net_device *netdev = adapter->netdev;
struct qlcnic_vf_info *vf;
struct qlcnic_cmd_args cmd;
int ret;
+ memset(&cmd, 0, sizeof(cmd));
if (vid == 0)
return 0;
dev_err(&adapter->pdev->dev,
"Failed to configure guest VLAN, err=%d\n", ret);
} else {
+ netif_addr_lock_bh(netdev);
qlcnic_free_mac_list(adapter);
+ netif_addr_unlock_bh(netdev);
if (enable)
qlcnic_sriov_vlan_operation(vf, vid, QLC_VLAN_ADD);
else
qlcnic_sriov_vlan_operation(vf, vid, QLC_VLAN_DELETE);
- qlcnic_set_multi(adapter->netdev);
+ netif_addr_lock_bh(netdev);
+ qlcnic_set_multi(netdev);
+ netif_addr_unlock_bh(netdev);
}
qlcnic_free_mbx_args(&cmd);
{
bool err = false;
- mutex_lock(&vf->vlan_list_lock);
+ spin_lock_bh(&vf->vlan_list_lock);
if (vf->num_vlan)
err = true;
- mutex_unlock(&vf->vlan_list_lock);
+ spin_unlock_bh(&vf->vlan_list_lock);
return err;
}
module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_drain_timeout_jiffies;
+unsigned int xenvif_max_queues;
+module_param_named(max_queues, xenvif_max_queues, uint, 0644);
+MODULE_PARM_DESC(max_queues,
+ "Maximum number of queues per virtual interface");
+
/*
* This is the maximum slots a skb can have. If a guest sends a skb
* which exceeds this limit it is considered malicious.
static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
module_param(fatal_skb_slots, uint, 0444);
-static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
+static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
u8 status);
-static void make_tx_response(struct xenvif *vif,
+static void make_tx_response(struct xenvif_queue *queue,
struct xen_netif_tx_request *txp,
s8 st);
-static inline int tx_work_todo(struct xenvif *vif);
-static inline int rx_work_todo(struct xenvif *vif);
+static inline int tx_work_todo(struct xenvif_queue *queue);
+static inline int rx_work_todo(struct xenvif_queue *queue);
-static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
+static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
u16 id,
s8 st,
u16 offset,
u16 size,
u16 flags);
-static inline unsigned long idx_to_pfn(struct xenvif *vif,
+static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
u16 idx)
{
- return page_to_pfn(vif->mmap_pages[idx]);
+ return page_to_pfn(queue->mmap_pages[idx]);
}
-static inline unsigned long idx_to_kaddr(struct xenvif *vif,
+static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
u16 idx)
{
- return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
}
#define callback_param(vif, pending_idx) \
/* Find the containing VIF's structure from a pointer in pending_tx_info array
*/
-static inline struct xenvif *ubuf_to_vif(const struct ubuf_info *ubuf)
+static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
{
u16 pending_idx = ubuf->desc;
struct pending_tx_info *temp =
container_of(ubuf, struct pending_tx_info, callback_struct);
return container_of(temp - pending_idx,
- struct xenvif,
+ struct xenvif_queue,
pending_tx_info[0]);
}
return i & (MAX_PENDING_REQS-1);
}
-bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
+bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
{
RING_IDX prod, cons;
do {
- prod = vif->rx.sring->req_prod;
- cons = vif->rx.req_cons;
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
if (prod - cons >= needed)
return true;
- vif->rx.sring->req_event = prod + 1;
+ queue->rx.sring->req_event = prod + 1;
/* Make sure event is visible before we check prod
* again.
*/
mb();
- } while (vif->rx.sring->req_prod != prod);
+ } while (queue->rx.sring->req_prod != prod);
return false;
}
* adding 'size' bytes to a buffer which currently contains 'offset'
* bytes.
*/
- static bool start_new_rx_buffer(int offset, unsigned long size, int head)
+ static bool start_new_rx_buffer(int offset, unsigned long size, int head,
+ bool full_coalesce)
{
/* simple case: we have completely filled the current buffer. */
if (offset == MAX_BUFFER_OFFSET)
* (i) this frag would fit completely in the next buffer
* and (ii) there is already some data in the current buffer
* and (iii) this is not the head buffer.
+ * and (iv) there is no need to fully utilize the buffers
*
* Where:
* - (i) stops us splitting a frag into two copies
* by (ii) but is explicitly checked because
* netfront relies on the first buffer being
* non-empty and can crash otherwise.
+ * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
+ * slot
*
* This means we will effectively linearise small
* frags but do not needlessly split large buffers
* own buffers as before.
*/
BUG_ON(size > MAX_BUFFER_OFFSET);
- if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head)
+ if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
+ !full_coalesce)
return true;
return false;
grant_ref_t copy_gref;
};
-static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
+static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
struct netrx_pending_operations *npo)
{
struct xenvif_rx_meta *meta;
struct xen_netif_rx_request *req;
- req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
+ req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
meta = npo->meta + npo->meta_prod++;
meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
return meta;
}
+ struct xenvif_rx_cb {
+ int meta_slots_used;
+ bool full_coalesce;
+ };
+
+ #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
/*
* Set up the grant operations for this fragment. If it's a flipping
* interface, we also set up the unmap request from here.
*/
-static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
+static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
struct netrx_pending_operations *npo,
struct page *page, unsigned long size,
unsigned long offset, int *head,
- struct xenvif *foreign_vif,
+ struct xenvif_queue *foreign_queue,
grant_ref_t foreign_gref)
{
struct gnttab_copy *copy_gop;
if (bytes > size)
bytes = size;
- if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
+ if (start_new_rx_buffer(npo->copy_off,
+ bytes,
+ *head,
+ XENVIF_RX_CB(skb)->full_coalesce)) {
/*
* Netfront requires there to be some data in the head
* buffer.
*/
BUG_ON(*head);
- meta = get_next_rx_buffer(vif, npo);
+ meta = get_next_rx_buffer(queue, npo);
}
if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
copy_gop->flags = GNTCOPY_dest_gref;
copy_gop->len = bytes;
- if (foreign_vif) {
- copy_gop->source.domid = foreign_vif->domid;
+ if (foreign_queue) {
+ copy_gop->source.domid = foreign_queue->vif->domid;
copy_gop->source.u.ref = foreign_gref;
copy_gop->flags |= GNTCOPY_source_gref;
} else {
}
copy_gop->source.offset = offset;
- copy_gop->dest.domid = vif->domid;
+ copy_gop->dest.domid = queue->vif->domid;
copy_gop->dest.offset = npo->copy_off;
copy_gop->dest.u.ref = npo->copy_gref;
gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
}
- if (*head && ((1 << gso_type) & vif->gso_mask))
- vif->rx.req_cons++;
+ if (*head && ((1 << gso_type) & queue->vif->gso_mask))
+ queue->rx.req_cons++;
*head = 0; /* There must be something in this buffer now. */
const int i,
const struct ubuf_info *ubuf)
{
- struct xenvif *foreign_vif = ubuf_to_vif(ubuf);
+ struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf);
do {
u16 pending_idx = ubuf->desc;
if (skb_shinfo(skb)->frags[i].page.p ==
- foreign_vif->mmap_pages[pending_idx])
+ foreign_queue->mmap_pages[pending_idx])
break;
ubuf = (struct ubuf_info *) ubuf->ctx;
} while (ubuf);
* frontend-side LRO).
*/
static int xenvif_gop_skb(struct sk_buff *skb,
- struct netrx_pending_operations *npo)
+ struct netrx_pending_operations *npo,
+ struct xenvif_queue *queue)
{
struct xenvif *vif = netdev_priv(skb->dev);
int nr_frags = skb_shinfo(skb)->nr_frags;
/* Set up a GSO prefix descriptor, if necessary */
if ((1 << gso_type) & vif->gso_prefix_mask) {
- req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
+ req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
meta = npo->meta + npo->meta_prod++;
meta->gso_type = gso_type;
meta->gso_size = skb_shinfo(skb)->gso_size;
meta->id = req->id;
}
- req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
+ req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
meta = npo->meta + npo->meta_prod++;
if ((1 << gso_type) & vif->gso_mask) {
if (data + len > skb_tail_pointer(skb))
len = skb_tail_pointer(skb) - data;
- xenvif_gop_frag_copy(vif, skb, npo,
+ xenvif_gop_frag_copy(queue, skb, npo,
virt_to_page(data), len, offset, &head,
NULL,
0);
/* This variable also signals whether foreign_gref has a real
* value or not.
*/
- struct xenvif *foreign_vif = NULL;
+ struct xenvif_queue *foreign_queue = NULL;
grant_ref_t foreign_gref;
if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
if (likely(ubuf)) {
u16 pending_idx = ubuf->desc;
- foreign_vif = ubuf_to_vif(ubuf);
- foreign_gref = foreign_vif->pending_tx_info[pending_idx].req.gref;
+ foreign_queue = ubuf_to_queue(ubuf);
+ foreign_gref =
+ foreign_queue->pending_tx_info[pending_idx].req.gref;
/* Just a safety measure. If this was the last
* element on the list, the for loop will
* iterate again if a local page were added to
*/
ubuf = head_ubuf;
}
- xenvif_gop_frag_copy(vif, skb, npo,
+ xenvif_gop_frag_copy(queue, skb, npo,
skb_frag_page(&skb_shinfo(skb)->frags[i]),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
skb_shinfo(skb)->frags[i].page_offset,
&head,
- foreign_vif,
- foreign_vif ? foreign_gref : UINT_MAX);
+ foreign_queue,
+ foreign_queue ? foreign_gref : UINT_MAX);
}
return npo->meta_prod - old_meta_prod;
return status;
}
-static void xenvif_add_frag_responses(struct xenvif *vif, int status,
+static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
struct xenvif_rx_meta *meta,
int nr_meta_slots)
{
flags = XEN_NETRXF_more_data;
offset = 0;
- make_rx_response(vif, meta[i].id, status, offset,
+ make_rx_response(queue, meta[i].id, status, offset,
meta[i].size, flags);
}
}
- struct xenvif_rx_cb {
- int meta_slots_used;
- };
-
- #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
-
-void xenvif_kick_thread(struct xenvif *vif)
+void xenvif_kick_thread(struct xenvif_queue *queue)
{
- wake_up(&vif->wq);
+ wake_up(&queue->wq);
}
-static void xenvif_rx_action(struct xenvif *vif)
+static void xenvif_rx_action(struct xenvif_queue *queue)
{
s8 status;
u16 flags;
bool need_to_notify = false;
struct netrx_pending_operations npo = {
- .copy = vif->grant_copy_op,
- .meta = vif->meta,
+ .copy = queue->grant_copy_op,
+ .meta = queue->meta,
};
skb_queue_head_init(&rxq);
- while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
+ while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
RING_IDX max_slots_needed;
RING_IDX old_req_cons;
RING_IDX ring_slots_used;
/* To avoid the estimate becoming too pessimal for some
* frontends that limit posted rx requests, cap the estimate
- * at MAX_SKB_FRAGS.
+ * at MAX_SKB_FRAGS. In this case netback will fully coalesce
+ * the skb into the provided slots.
*/
- if (max_slots_needed > MAX_SKB_FRAGS)
+ if (max_slots_needed > MAX_SKB_FRAGS) {
max_slots_needed = MAX_SKB_FRAGS;
+ XENVIF_RX_CB(skb)->full_coalesce = true;
+ } else {
+ XENVIF_RX_CB(skb)->full_coalesce = false;
+ }
/* We may need one more slot for GSO metadata */
if (skb_is_gso(skb) &&
max_slots_needed++;
/* If the skb may not fit then bail out now */
- if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
- skb_queue_head(&vif->rx_queue, skb);
+ if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
+ skb_queue_head(&queue->rx_queue, skb);
need_to_notify = true;
- vif->rx_last_skb_slots = max_slots_needed;
+ queue->rx_last_skb_slots = max_slots_needed;
break;
} else
- vif->rx_last_skb_slots = 0;
+ queue->rx_last_skb_slots = 0;
- old_req_cons = vif->rx.req_cons;
- XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo);
- ring_slots_used = vif->rx.req_cons - old_req_cons;
+ old_req_cons = queue->rx.req_cons;
+ XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
+ ring_slots_used = queue->rx.req_cons - old_req_cons;
BUG_ON(ring_slots_used > max_slots_needed);
__skb_queue_tail(&rxq, skb);
}
- BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));
+ BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
if (!npo.copy_prod)
goto done;
BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
- gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
+ gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
while ((skb = __skb_dequeue(&rxq)) != NULL) {
- if ((1 << vif->meta[npo.meta_cons].gso_type) &
- vif->gso_prefix_mask) {
- resp = RING_GET_RESPONSE(&vif->rx,
- vif->rx.rsp_prod_pvt++);
+ if ((1 << queue->meta[npo.meta_cons].gso_type) &
+ queue->vif->gso_prefix_mask) {
+ resp = RING_GET_RESPONSE(&queue->rx,
+ queue->rx.rsp_prod_pvt++);
resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
- resp->offset = vif->meta[npo.meta_cons].gso_size;
- resp->id = vif->meta[npo.meta_cons].id;
+ resp->offset = queue->meta[npo.meta_cons].gso_size;
+ resp->id = queue->meta[npo.meta_cons].id;
resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
npo.meta_cons++;
}
- vif->dev->stats.tx_bytes += skb->len;
- vif->dev->stats.tx_packets++;
+ queue->stats.tx_bytes += skb->len;
+ queue->stats.tx_packets++;
- status = xenvif_check_gop(vif,
+ status = xenvif_check_gop(queue->vif,
XENVIF_RX_CB(skb)->meta_slots_used,
&npo);
flags |= XEN_NETRXF_data_validated;
offset = 0;
- resp = make_rx_response(vif, vif->meta[npo.meta_cons].id,
+ resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
status, offset,
- vif->meta[npo.meta_cons].size,
+ queue->meta[npo.meta_cons].size,
flags);
- if ((1 << vif->meta[npo.meta_cons].gso_type) &
- vif->gso_mask) {
+ if ((1 << queue->meta[npo.meta_cons].gso_type) &
+ queue->vif->gso_mask) {
struct xen_netif_extra_info *gso =
(struct xen_netif_extra_info *)
- RING_GET_RESPONSE(&vif->rx,
- vif->rx.rsp_prod_pvt++);
+ RING_GET_RESPONSE(&queue->rx,
+ queue->rx.rsp_prod_pvt++);
resp->flags |= XEN_NETRXF_extra_info;
- gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
- gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
+ gso->u.gso.type = queue->meta[npo.meta_cons].gso_type;
+ gso->u.gso.size = queue->meta[npo.meta_cons].gso_size;
gso->u.gso.pad = 0;
gso->u.gso.features = 0;
gso->flags = 0;
}
- xenvif_add_frag_responses(vif, status,
- vif->meta + npo.meta_cons + 1,
+ xenvif_add_frag_responses(queue, status,
+ queue->meta + npo.meta_cons + 1,
XENVIF_RX_CB(skb)->meta_slots_used);
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
need_to_notify |= !!ret;
done:
if (need_to_notify)
- notify_remote_via_irq(vif->rx_irq);
+ notify_remote_via_irq(queue->rx_irq);
}
-void xenvif_napi_schedule_or_enable_events(struct xenvif *vif)
+void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
{
int more_to_do;
- RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
+ RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
if (more_to_do)
- napi_schedule(&vif->napi);
+ napi_schedule(&queue->napi);
}
-static void tx_add_credit(struct xenvif *vif)
+static void tx_add_credit(struct xenvif_queue *queue)
{
unsigned long max_burst, max_credit;
* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
* Otherwise the interface can seize up due to insufficient credit.
*/
- max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
+ max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
max_burst = min(max_burst, 131072UL);
- max_burst = max(max_burst, vif->credit_bytes);
+ max_burst = max(max_burst, queue->credit_bytes);
/* Take care that adding a new chunk of credit doesn't wrap to zero. */
- max_credit = vif->remaining_credit + vif->credit_bytes;
- if (max_credit < vif->remaining_credit)
+ max_credit = queue->remaining_credit + queue->credit_bytes;
+ if (max_credit < queue->remaining_credit)
max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
- vif->remaining_credit = min(max_credit, max_burst);
+ queue->remaining_credit = min(max_credit, max_burst);
}
static void tx_credit_callback(unsigned long data)
{
- struct xenvif *vif = (struct xenvif *)data;
- tx_add_credit(vif);
- xenvif_napi_schedule_or_enable_events(vif);
+ struct xenvif_queue *queue = (struct xenvif_queue *)data;
+ tx_add_credit(queue);
+ xenvif_napi_schedule_or_enable_events(queue);
}
-static void xenvif_tx_err(struct xenvif *vif,
+static void xenvif_tx_err(struct xenvif_queue *queue,
struct xen_netif_tx_request *txp, RING_IDX end)
{
- RING_IDX cons = vif->tx.req_cons;
+ RING_IDX cons = queue->tx.req_cons;
unsigned long flags;
do {
- spin_lock_irqsave(&vif->response_lock, flags);
- make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
- spin_unlock_irqrestore(&vif->response_lock, flags);
+ spin_lock_irqsave(&queue->response_lock, flags);
+ make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR);
+ spin_unlock_irqrestore(&queue->response_lock, flags);
if (cons == end)
break;
- txp = RING_GET_REQUEST(&vif->tx, cons++);
+ txp = RING_GET_REQUEST(&queue->tx, cons++);
} while (1);
- vif->tx.req_cons = cons;
+ queue->tx.req_cons = cons;
}
static void xenvif_fatal_tx_err(struct xenvif *vif)
{
netdev_err(vif->dev, "fatal error; disabling device\n");
vif->disabled = true;
- xenvif_kick_thread(vif);
+ /* Disable the vif from queue 0's kthread */
+ if (vif->queues)
+ xenvif_kick_thread(&vif->queues[0]);
}
-static int xenvif_count_requests(struct xenvif *vif,
+static int xenvif_count_requests(struct xenvif_queue *queue,
struct xen_netif_tx_request *first,
struct xen_netif_tx_request *txp,
int work_to_do)
{
- RING_IDX cons = vif->tx.req_cons;
+ RING_IDX cons = queue->tx.req_cons;
int slots = 0;
int drop_err = 0;
int more_data;
struct xen_netif_tx_request dropped_tx = { 0 };
if (slots >= work_to_do) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Asked for %d slots but exceeds this limit\n",
work_to_do);
- xenvif_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(queue->vif);
return -ENODATA;
}
* considered malicious.
*/
if (unlikely(slots >= fatal_skb_slots)) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Malicious frontend using %d slots, threshold %u\n",
slots, fatal_skb_slots);
- xenvif_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(queue->vif);
return -E2BIG;
}
*/
if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
if (net_ratelimit())
- netdev_dbg(vif->dev,
+ netdev_dbg(queue->vif->dev,
"Too many slots (%d) exceeding limit (%d), dropping packet\n",
slots, XEN_NETBK_LEGACY_SLOTS_MAX);
drop_err = -E2BIG;
if (drop_err)
txp = &dropped_tx;
- memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
+ memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
sizeof(*txp));
/* If the guest submitted a frame >= 64 KiB then
*/
if (!drop_err && txp->size > first->size) {
if (net_ratelimit())
- netdev_dbg(vif->dev,
+ netdev_dbg(queue->vif->dev,
"Invalid tx request, slot size %u > remaining size %u\n",
txp->size, first->size);
drop_err = -EIO;
slots++;
if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
- netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
+ netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
txp->offset, txp->size);
- xenvif_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(queue->vif);
return -EINVAL;
}
} while (more_data);
if (drop_err) {
- xenvif_tx_err(vif, first, cons + slots);
+ xenvif_tx_err(queue, first, cons + slots);
return drop_err;
}
#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
-static inline void xenvif_tx_create_map_op(struct xenvif *vif,
+static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
u16 pending_idx,
struct xen_netif_tx_request *txp,
struct gnttab_map_grant_ref *mop)
{
- vif->pages_to_map[mop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
- gnttab_set_map_op(mop, idx_to_kaddr(vif, pending_idx),
+ queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
+ gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
- txp->gref, vif->domid);
+ txp->gref, queue->vif->domid);
- memcpy(&vif->pending_tx_info[pending_idx].req, txp,
+ memcpy(&queue->pending_tx_info[pending_idx].req, txp,
sizeof(*txp));
}
return skb;
}
-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
+static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
struct sk_buff *skb,
struct xen_netif_tx_request *txp,
struct gnttab_map_grant_ref *gop)
for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
shinfo->nr_frags++, txp++, gop++) {
- index = pending_index(vif->pending_cons++);
- pending_idx = vif->pending_ring[index];
- xenvif_tx_create_map_op(vif, pending_idx, txp, gop);
+ index = pending_index(queue->pending_cons++);
+ pending_idx = queue->pending_ring[index];
+ xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
}
struct sk_buff *nskb = xenvif_alloc_skb(0);
if (unlikely(nskb == NULL)) {
if (net_ratelimit())
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Can't allocate the frag_list skb.\n");
return NULL;
}
for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
shinfo->nr_frags++, txp++, gop++) {
- index = pending_index(vif->pending_cons++);
- pending_idx = vif->pending_ring[index];
- xenvif_tx_create_map_op(vif, pending_idx, txp, gop);
+ index = pending_index(queue->pending_cons++);
+ pending_idx = queue->pending_ring[index];
+ xenvif_tx_create_map_op(queue, pending_idx, txp, gop);
frag_set_pending_idx(&frags[shinfo->nr_frags],
pending_idx);
}
return gop;
}
-static inline void xenvif_grant_handle_set(struct xenvif *vif,
+static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
u16 pending_idx,
grant_handle_t handle)
{
- if (unlikely(vif->grant_tx_handle[pending_idx] !=
+ if (unlikely(queue->grant_tx_handle[pending_idx] !=
NETBACK_INVALID_HANDLE)) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Trying to overwrite active handle! pending_idx: %x\n",
pending_idx);
BUG();
}
- vif->grant_tx_handle[pending_idx] = handle;
+ queue->grant_tx_handle[pending_idx] = handle;
}
-static inline void xenvif_grant_handle_reset(struct xenvif *vif,
+static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
u16 pending_idx)
{
- if (unlikely(vif->grant_tx_handle[pending_idx] ==
+ if (unlikely(queue->grant_tx_handle[pending_idx] ==
NETBACK_INVALID_HANDLE)) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Trying to unmap invalid handle! pending_idx: %x\n",
pending_idx);
BUG();
}
- vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
+ queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
}
-static int xenvif_tx_check_gop(struct xenvif *vif,
+static int xenvif_tx_check_gop(struct xenvif_queue *queue,
struct sk_buff *skb,
struct gnttab_map_grant_ref **gopp_map,
struct gnttab_copy **gopp_copy)
(*gopp_copy)++;
if (unlikely(err)) {
if (net_ratelimit())
- netdev_dbg(vif->dev,
+ netdev_dbg(queue->vif->dev,
"Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
(*gopp_copy)->status,
pending_idx,
(*gopp_copy)->source.u.ref);
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+ xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
}
check_frags:
newerr = gop_map->status;
if (likely(!newerr)) {
- xenvif_grant_handle_set(vif,
+ xenvif_grant_handle_set(queue,
pending_idx,
gop_map->handle);
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
- xenvif_idx_unmap(vif, pending_idx);
+ xenvif_idx_unmap(queue, pending_idx);
continue;
}
/* Error on this fragment: respond to client with an error. */
if (net_ratelimit())
- netdev_dbg(vif->dev,
+ netdev_dbg(queue->vif->dev,
"Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
i,
gop_map->status,
pending_idx,
gop_map->ref);
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+ xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
/* Not the first error? Preceding frags already invalidated. */
if (err)
/* First error: invalidate preceding fragments. */
for (j = 0; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
- xenvif_idx_unmap(vif, pending_idx);
+ xenvif_idx_unmap(queue, pending_idx);
}
/* Remember the error: invalidate all subsequent fragments. */
shinfo = skb_shinfo(first_skb);
for (j = 0; j < shinfo->nr_frags; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
- xenvif_idx_unmap(vif, pending_idx);
+ xenvif_idx_unmap(queue, pending_idx);
}
}
return err;
}
-static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
+static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int nr_frags = shinfo->nr_frags;
/* If this is not the first frag, chain it to the previous*/
if (prev_pending_idx == INVALID_PENDING_IDX)
skb_shinfo(skb)->destructor_arg =
- &callback_param(vif, pending_idx);
+ &callback_param(queue, pending_idx);
else
- callback_param(vif, prev_pending_idx).ctx =
- &callback_param(vif, pending_idx);
+ callback_param(queue, prev_pending_idx).ctx =
+ &callback_param(queue, pending_idx);
- callback_param(vif, pending_idx).ctx = NULL;
+ callback_param(queue, pending_idx).ctx = NULL;
prev_pending_idx = pending_idx;
- txp = &vif->pending_tx_info[pending_idx].req;
- page = virt_to_page(idx_to_kaddr(vif, pending_idx));
+ txp = &queue->pending_tx_info[pending_idx].req;
+ page = virt_to_page(idx_to_kaddr(queue, pending_idx));
__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
skb->len += txp->size;
skb->data_len += txp->size;
skb->truesize += txp->size;
/* Take an extra reference to offset network stack's put_page */
- get_page(vif->mmap_pages[pending_idx]);
+ get_page(queue->mmap_pages[pending_idx]);
}
/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
* overlaps with "index", and "mapping" is not set. I think mapping
skb->pfmemalloc = false;
}
-static int xenvif_get_extras(struct xenvif *vif,
+static int xenvif_get_extras(struct xenvif_queue *queue,
struct xen_netif_extra_info *extras,
int work_to_do)
{
struct xen_netif_extra_info extra;
- RING_IDX cons = vif->tx.req_cons;
+ RING_IDX cons = queue->tx.req_cons;
do {
if (unlikely(work_to_do-- <= 0)) {
- netdev_err(vif->dev, "Missing extra info\n");
- xenvif_fatal_tx_err(vif);
+ netdev_err(queue->vif->dev, "Missing extra info\n");
+ xenvif_fatal_tx_err(queue->vif);
return -EBADR;
}
- memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
+ memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
sizeof(extra));
if (unlikely(!extra.type ||
extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
- vif->tx.req_cons = ++cons;
- netdev_err(vif->dev,
+ queue->tx.req_cons = ++cons;
+ netdev_err(queue->vif->dev,
"Invalid extra type: %d\n", extra.type);
- xenvif_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(queue->vif);
return -EINVAL;
}
memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
- vif->tx.req_cons = ++cons;
+ queue->tx.req_cons = ++cons;
} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
return work_to_do;
return 0;
}
-static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
+static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb)
{
bool recalculate_partial_csum = false;
* recalculate the partial checksum.
*/
if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
- vif->rx_gso_checksum_fixup++;
+ queue->stats.rx_gso_checksum_fixup++;
skb->ip_summed = CHECKSUM_PARTIAL;
recalculate_partial_csum = true;
}
return skb_checksum_setup(skb, recalculate_partial_csum);
}
-static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
+static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
{
u64 now = get_jiffies_64();
- u64 next_credit = vif->credit_window_start +
- msecs_to_jiffies(vif->credit_usec / 1000);
+ u64 next_credit = queue->credit_window_start +
+ msecs_to_jiffies(queue->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
- if (timer_pending(&vif->credit_timeout))
+ if (timer_pending(&queue->credit_timeout))
return true;
/* Passed the point where we can replenish credit? */
if (time_after_eq64(now, next_credit)) {
- vif->credit_window_start = now;
- tx_add_credit(vif);
+ queue->credit_window_start = now;
+ tx_add_credit(queue);
}
/* Still too big to send right now? Set a callback. */
- if (size > vif->remaining_credit) {
- vif->credit_timeout.data =
- (unsigned long)vif;
- vif->credit_timeout.function =
+ if (size > queue->remaining_credit) {
+ queue->credit_timeout.data =
+ (unsigned long)queue;
+ queue->credit_timeout.function =
tx_credit_callback;
- mod_timer(&vif->credit_timeout,
+ mod_timer(&queue->credit_timeout,
next_credit);
- vif->credit_window_start = next_credit;
+ queue->credit_window_start = next_credit;
return true;
}
return false;
}
-static void xenvif_tx_build_gops(struct xenvif *vif,
+static void xenvif_tx_build_gops(struct xenvif_queue *queue,
int budget,
unsigned *copy_ops,
unsigned *map_ops)
{
- struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
+ struct gnttab_map_grant_ref *gop = queue->tx_map_ops, *request_gop;
struct sk_buff *skb;
int ret;
- while (skb_queue_len(&vif->tx_queue) < budget) {
+ while (skb_queue_len(&queue->tx_queue) < budget) {
struct xen_netif_tx_request txreq;
struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
unsigned int data_len;
pending_ring_idx_t index;
- if (vif->tx.sring->req_prod - vif->tx.req_cons >
+ if (queue->tx.sring->req_prod - queue->tx.req_cons >
XEN_NETIF_TX_RING_SIZE) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Impossible number of requests. "
"req_prod %d, req_cons %d, size %ld\n",
- vif->tx.sring->req_prod, vif->tx.req_cons,
+ queue->tx.sring->req_prod, queue->tx.req_cons,
XEN_NETIF_TX_RING_SIZE);
- xenvif_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(queue->vif);
break;
}
- work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
+ work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
if (!work_to_do)
break;
- idx = vif->tx.req_cons;
+ idx = queue->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
- memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
+ memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
/* Credit-based scheduling. */
- if (txreq.size > vif->remaining_credit &&
- tx_credit_exceeded(vif, txreq.size))
+ if (txreq.size > queue->remaining_credit &&
+ tx_credit_exceeded(queue, txreq.size))
break;
- vif->remaining_credit -= txreq.size;
+ queue->remaining_credit -= txreq.size;
work_to_do--;
- vif->tx.req_cons = ++idx;
+ queue->tx.req_cons = ++idx;
memset(extras, 0, sizeof(extras));
if (txreq.flags & XEN_NETTXF_extra_info) {
- work_to_do = xenvif_get_extras(vif, extras,
+ work_to_do = xenvif_get_extras(queue, extras,
work_to_do);
- idx = vif->tx.req_cons;
+ idx = queue->tx.req_cons;
if (unlikely(work_to_do < 0))
break;
}
- ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do);
+ ret = xenvif_count_requests(queue, &txreq, txfrags, work_to_do);
if (unlikely(ret < 0))
break;
idx += ret;
if (unlikely(txreq.size < ETH_HLEN)) {
- netdev_dbg(vif->dev,
+ netdev_dbg(queue->vif->dev,
"Bad packet size: %d\n", txreq.size);
- xenvif_tx_err(vif, &txreq, idx);
+ xenvif_tx_err(queue, &txreq, idx);
break;
}
/* No crossing a page as the payload mustn't fragment. */
if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"txreq.offset: %x, size: %u, end: %lu\n",
txreq.offset, txreq.size,
(txreq.offset&~PAGE_MASK) + txreq.size);
- xenvif_fatal_tx_err(vif);
+ xenvif_fatal_tx_err(queue->vif);
break;
}
- index = pending_index(vif->pending_cons);
- pending_idx = vif->pending_ring[index];
+ index = pending_index(queue->pending_cons);
+ pending_idx = queue->pending_ring[index];
data_len = (txreq.size > PKT_PROT_LEN &&
ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
skb = xenvif_alloc_skb(data_len);
if (unlikely(skb == NULL)) {
- netdev_dbg(vif->dev,
+ netdev_dbg(queue->vif->dev,
"Can't allocate a skb in start_xmit.\n");
- xenvif_tx_err(vif, &txreq, idx);
+ xenvif_tx_err(queue, &txreq, idx);
break;
}
struct xen_netif_extra_info *gso;
gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
- if (xenvif_set_skb_gso(vif, skb, gso)) {
+ if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
/* Failure in xenvif_set_skb_gso is fatal. */
kfree_skb(skb);
break;
XENVIF_TX_CB(skb)->pending_idx = pending_idx;
__skb_put(skb, data_len);
- vif->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
- vif->tx_copy_ops[*copy_ops].source.domid = vif->domid;
- vif->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
+ queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
+ queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
+ queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
- vif->tx_copy_ops[*copy_ops].dest.u.gmfn =
+ queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
virt_to_mfn(skb->data);
- vif->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
- vif->tx_copy_ops[*copy_ops].dest.offset =
+ queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
+ queue->tx_copy_ops[*copy_ops].dest.offset =
offset_in_page(skb->data);
- vif->tx_copy_ops[*copy_ops].len = data_len;
- vif->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+ queue->tx_copy_ops[*copy_ops].len = data_len;
+ queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
(*copy_ops)++;
skb_shinfo(skb)->nr_frags++;
frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
pending_idx);
- xenvif_tx_create_map_op(vif, pending_idx, &txreq, gop);
+ xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop);
gop++;
} else {
frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
INVALID_PENDING_IDX);
- memcpy(&vif->pending_tx_info[pending_idx].req, &txreq,
+ memcpy(&queue->pending_tx_info[pending_idx].req, &txreq,
sizeof(txreq));
}
- vif->pending_cons++;
+ queue->pending_cons++;
- request_gop = xenvif_get_requests(vif, skb, txfrags, gop);
+ request_gop = xenvif_get_requests(queue, skb, txfrags, gop);
if (request_gop == NULL) {
kfree_skb(skb);
- xenvif_tx_err(vif, &txreq, idx);
+ xenvif_tx_err(queue, &txreq, idx);
break;
}
gop = request_gop;
- __skb_queue_tail(&vif->tx_queue, skb);
+ __skb_queue_tail(&queue->tx_queue, skb);
- vif->tx.req_cons = idx;
+ queue->tx.req_cons = idx;
- if (((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops)) ||
- (*copy_ops >= ARRAY_SIZE(vif->tx_copy_ops)))
+ if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
+ (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
break;
}
- (*map_ops) = gop - vif->tx_map_ops;
+ (*map_ops) = gop - queue->tx_map_ops;
return;
}
/* Consolidate skb with a frag_list into a brand new one with local pages on
* frags. Returns 0 or -ENOMEM if can't allocate new pages.
*/
-static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
+static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
{
unsigned int offset = skb_headlen(skb);
skb_frag_t frags[MAX_SKB_FRAGS];
struct ubuf_info *uarg;
struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
- vif->tx_zerocopy_sent += 2;
- vif->tx_frag_overflow++;
+ queue->stats.tx_zerocopy_sent += 2;
+ queue->stats.tx_frag_overflow++;
- xenvif_fill_frags(vif, nskb);
+ xenvif_fill_frags(queue, nskb);
/* Subtract frags size, we will correct it later */
skb->truesize -= skb->data_len;
skb->len += nskb->len;
return 0;
}
-static int xenvif_tx_submit(struct xenvif *vif)
+static int xenvif_tx_submit(struct xenvif_queue *queue)
{
- struct gnttab_map_grant_ref *gop_map = vif->tx_map_ops;
- struct gnttab_copy *gop_copy = vif->tx_copy_ops;
+ struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
+ struct gnttab_copy *gop_copy = queue->tx_copy_ops;
struct sk_buff *skb;
int work_done = 0;
- while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
+ while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
struct xen_netif_tx_request *txp;
u16 pending_idx;
unsigned data_len;
pending_idx = XENVIF_TX_CB(skb)->pending_idx;
- txp = &vif->pending_tx_info[pending_idx].req;
+ txp = &queue->pending_tx_info[pending_idx].req;
/* Check the remap error code. */
- if (unlikely(xenvif_tx_check_gop(vif, skb, &gop_map, &gop_copy))) {
+ if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
continue;
}
data_len = skb->len;
- callback_param(vif, pending_idx).ctx = NULL;
+ callback_param(queue, pending_idx).ctx = NULL;
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
txp->offset += data_len;
txp->size -= data_len;
} else {
/* Schedule a response immediately. */
- xenvif_idx_release(vif, pending_idx,
+ xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_OKAY);
}
else if (txp->flags & XEN_NETTXF_data_validated)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- xenvif_fill_frags(vif, skb);
+ xenvif_fill_frags(queue, skb);
if (unlikely(skb_has_frag_list(skb))) {
- if (xenvif_handle_frag_list(vif, skb)) {
+ if (xenvif_handle_frag_list(queue, skb)) {
if (net_ratelimit())
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Not enough memory to consolidate frag_list!\n");
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
kfree_skb(skb);
__pskb_pull_tail(skb, target - skb_headlen(skb));
}
- skb->dev = vif->dev;
+ skb->dev = queue->vif->dev;
skb->protocol = eth_type_trans(skb, skb->dev);
skb_reset_network_header(skb);
- if (checksum_setup(vif, skb)) {
- netdev_dbg(vif->dev,
+ if (checksum_setup(queue, skb)) {
+ netdev_dbg(queue->vif->dev,
"Can't setup checksum in net_tx_action\n");
/* We have to set this flag to trigger the callback */
if (skb_shinfo(skb)->destructor_arg)
DIV_ROUND_UP(skb->len - hdrlen, mss);
}
- vif->dev->stats.rx_bytes += skb->len;
- vif->dev->stats.rx_packets++;
+ queue->stats.rx_bytes += skb->len;
+ queue->stats.rx_packets++;
work_done++;
*/
if (skb_shinfo(skb)->destructor_arg) {
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
- vif->tx_zerocopy_sent++;
+ queue->stats.tx_zerocopy_sent++;
}
netif_receive_skb(skb);
{
unsigned long flags;
pending_ring_idx_t index;
- struct xenvif *vif = ubuf_to_vif(ubuf);
+ struct xenvif_queue *queue = ubuf_to_queue(ubuf);
/* This is the only place where we grab this lock, to protect callbacks
* from each other.
*/
- spin_lock_irqsave(&vif->callback_lock, flags);
+ spin_lock_irqsave(&queue->callback_lock, flags);
do {
u16 pending_idx = ubuf->desc;
ubuf = (struct ubuf_info *) ubuf->ctx;
- BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
+ BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
MAX_PENDING_REQS);
- index = pending_index(vif->dealloc_prod);
- vif->dealloc_ring[index] = pending_idx;
+ index = pending_index(queue->dealloc_prod);
+ queue->dealloc_ring[index] = pending_idx;
/* Sync with xenvif_tx_dealloc_action:
* insert idx then incr producer.
*/
smp_wmb();
- vif->dealloc_prod++;
+ queue->dealloc_prod++;
} while (ubuf);
- wake_up(&vif->dealloc_wq);
- spin_unlock_irqrestore(&vif->callback_lock, flags);
+ wake_up(&queue->dealloc_wq);
+ spin_unlock_irqrestore(&queue->callback_lock, flags);
if (likely(zerocopy_success))
- vif->tx_zerocopy_success++;
+ queue->stats.tx_zerocopy_success++;
else
- vif->tx_zerocopy_fail++;
+ queue->stats.tx_zerocopy_fail++;
}
-static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
+static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
{
struct gnttab_unmap_grant_ref *gop;
pending_ring_idx_t dc, dp;
u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
unsigned int i = 0;
- dc = vif->dealloc_cons;
- gop = vif->tx_unmap_ops;
+ dc = queue->dealloc_cons;
+ gop = queue->tx_unmap_ops;
/* Free up any grants we have finished using */
do {
- dp = vif->dealloc_prod;
+ dp = queue->dealloc_prod;
/* Ensure we see all indices enqueued by all
* xenvif_zerocopy_callback().
smp_rmb();
while (dc != dp) {
- BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
+ BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS);
pending_idx =
- vif->dealloc_ring[pending_index(dc++)];
+ queue->dealloc_ring[pending_index(dc++)];
- pending_idx_release[gop-vif->tx_unmap_ops] =
+ pending_idx_release[gop-queue->tx_unmap_ops] =
pending_idx;
- vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
- vif->mmap_pages[pending_idx];
+ queue->pages_to_unmap[gop-queue->tx_unmap_ops] =
+ queue->mmap_pages[pending_idx];
gnttab_set_unmap_op(gop,
- idx_to_kaddr(vif, pending_idx),
+ idx_to_kaddr(queue, pending_idx),
GNTMAP_host_map,
- vif->grant_tx_handle[pending_idx]);
- xenvif_grant_handle_reset(vif, pending_idx);
+ queue->grant_tx_handle[pending_idx]);
+ xenvif_grant_handle_reset(queue, pending_idx);
++gop;
}
- } while (dp != vif->dealloc_prod);
+ } while (dp != queue->dealloc_prod);
- vif->dealloc_cons = dc;
+ queue->dealloc_cons = dc;
- if (gop - vif->tx_unmap_ops > 0) {
+ if (gop - queue->tx_unmap_ops > 0) {
int ret;
- ret = gnttab_unmap_refs(vif->tx_unmap_ops,
+ ret = gnttab_unmap_refs(queue->tx_unmap_ops,
NULL,
- vif->pages_to_unmap,
- gop - vif->tx_unmap_ops);
+ queue->pages_to_unmap,
+ gop - queue->tx_unmap_ops);
if (ret) {
- netdev_err(vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
- gop - vif->tx_unmap_ops, ret);
- for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
+ netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tx ret %d\n",
+ gop - queue->tx_unmap_ops, ret);
+ for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
if (gop[i].status != GNTST_okay)
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
" host_addr: %llx handle: %x status: %d\n",
gop[i].host_addr,
gop[i].handle,
}
}
- for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
- xenvif_idx_release(vif, pending_idx_release[i],
+ for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
+ xenvif_idx_release(queue, pending_idx_release[i],
XEN_NETIF_RSP_OKAY);
}
/* Called after netfront has transmitted */
-int xenvif_tx_action(struct xenvif *vif, int budget)
+int xenvif_tx_action(struct xenvif_queue *queue, int budget)
{
unsigned nr_mops, nr_cops = 0;
int work_done, ret;
- if (unlikely(!tx_work_todo(vif)))
+ if (unlikely(!tx_work_todo(queue)))
return 0;
- xenvif_tx_build_gops(vif, budget, &nr_cops, &nr_mops);
+ xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
if (nr_cops == 0)
return 0;
- gnttab_batch_copy(vif->tx_copy_ops, nr_cops);
+ gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
if (nr_mops != 0) {
- ret = gnttab_map_refs(vif->tx_map_ops,
+ ret = gnttab_map_refs(queue->tx_map_ops,
NULL,
- vif->pages_to_map,
+ queue->pages_to_map,
nr_mops);
BUG_ON(ret);
}
- work_done = xenvif_tx_submit(vif);
+ work_done = xenvif_tx_submit(queue);
return work_done;
}
-static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
+static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
u8 status)
{
struct pending_tx_info *pending_tx_info;
pending_ring_idx_t index;
unsigned long flags;
- pending_tx_info = &vif->pending_tx_info[pending_idx];
- spin_lock_irqsave(&vif->response_lock, flags);
- make_tx_response(vif, &pending_tx_info->req, status);
- index = pending_index(vif->pending_prod);
- vif->pending_ring[index] = pending_idx;
+ pending_tx_info = &queue->pending_tx_info[pending_idx];
+ spin_lock_irqsave(&queue->response_lock, flags);
+ make_tx_response(queue, &pending_tx_info->req, status);
+ index = pending_index(queue->pending_prod);
+ queue->pending_ring[index] = pending_idx;
/* TX shouldn't use the index before we give it back here */
mb();
- vif->pending_prod++;
- spin_unlock_irqrestore(&vif->response_lock, flags);
+ queue->pending_prod++;
+ spin_unlock_irqrestore(&queue->response_lock, flags);
}
-static void make_tx_response(struct xenvif *vif,
+static void make_tx_response(struct xenvif_queue *queue,
struct xen_netif_tx_request *txp,
s8 st)
{
- RING_IDX i = vif->tx.rsp_prod_pvt;
+ RING_IDX i = queue->tx.rsp_prod_pvt;
struct xen_netif_tx_response *resp;
int notify;
- resp = RING_GET_RESPONSE(&vif->tx, i);
+ resp = RING_GET_RESPONSE(&queue->tx, i);
resp->id = txp->id;
resp->status = st;
if (txp->flags & XEN_NETTXF_extra_info)
- RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+ RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
- vif->tx.rsp_prod_pvt = ++i;
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
+ queue->tx.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
if (notify)
- notify_remote_via_irq(vif->tx_irq);
+ notify_remote_via_irq(queue->tx_irq);
}
-static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
+static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
u16 id,
s8 st,
u16 offset,
u16 size,
u16 flags)
{
- RING_IDX i = vif->rx.rsp_prod_pvt;
+ RING_IDX i = queue->rx.rsp_prod_pvt;
struct xen_netif_rx_response *resp;
- resp = RING_GET_RESPONSE(&vif->rx, i);
+ resp = RING_GET_RESPONSE(&queue->rx, i);
resp->offset = offset;
resp->flags = flags;
resp->id = id;
if (st < 0)
resp->status = (s16)st;
- vif->rx.rsp_prod_pvt = ++i;
+ queue->rx.rsp_prod_pvt = ++i;
return resp;
}
-void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
+void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
{
int ret;
struct gnttab_unmap_grant_ref tx_unmap_op;
gnttab_set_unmap_op(&tx_unmap_op,
- idx_to_kaddr(vif, pending_idx),
+ idx_to_kaddr(queue, pending_idx),
GNTMAP_host_map,
- vif->grant_tx_handle[pending_idx]);
- xenvif_grant_handle_reset(vif, pending_idx);
+ queue->grant_tx_handle[pending_idx]);
+ xenvif_grant_handle_reset(queue, pending_idx);
ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
- &vif->mmap_pages[pending_idx], 1);
+ &queue->mmap_pages[pending_idx], 1);
if (ret) {
- netdev_err(vif->dev,
+ netdev_err(queue->vif->dev,
"Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
ret,
pending_idx,
BUG();
}
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+ xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_OKAY);
}
-static inline int rx_work_todo(struct xenvif *vif)
+static inline int rx_work_todo(struct xenvif_queue *queue)
{
- return (!skb_queue_empty(&vif->rx_queue) &&
- xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) ||
- vif->rx_queue_purge;
+ return (!skb_queue_empty(&queue->rx_queue) &&
+ xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) ||
+ queue->rx_queue_purge;
}
-static inline int tx_work_todo(struct xenvif *vif)
+static inline int tx_work_todo(struct xenvif_queue *queue)
{
-
- if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)))
+ if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
return 1;
return 0;
}
-static inline bool tx_dealloc_work_todo(struct xenvif *vif)
+static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
{
- return vif->dealloc_cons != vif->dealloc_prod;
+ return queue->dealloc_cons != queue->dealloc_prod;
}
-void xenvif_unmap_frontend_rings(struct xenvif *vif)
+void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
{
- if (vif->tx.sring)
- xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
- vif->tx.sring);
- if (vif->rx.sring)
- xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
- vif->rx.sring);
+ if (queue->tx.sring)
+ xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
+ queue->tx.sring);
+ if (queue->rx.sring)
+ xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
+ queue->rx.sring);
}
-int xenvif_map_frontend_rings(struct xenvif *vif,
+int xenvif_map_frontend_rings(struct xenvif_queue *queue,
grant_ref_t tx_ring_ref,
grant_ref_t rx_ring_ref)
{
int err = -ENOMEM;
- err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+ err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
tx_ring_ref, &addr);
if (err)
goto err;
txs = (struct xen_netif_tx_sring *)addr;
- BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
+ BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
- err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+ err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
rx_ring_ref, &addr);
if (err)
goto err;
rxs = (struct xen_netif_rx_sring *)addr;
- BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
+ BACK_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
return 0;
err:
- xenvif_unmap_frontend_rings(vif);
+ xenvif_unmap_frontend_rings(queue);
return err;
}
-void xenvif_stop_queue(struct xenvif *vif)
+static void xenvif_start_queue(struct xenvif_queue *queue)
{
- if (!vif->can_queue)
- return;
-
- netif_stop_queue(vif->dev);
-}
-
-static void xenvif_start_queue(struct xenvif *vif)
-{
- if (xenvif_schedulable(vif))
- netif_wake_queue(vif->dev);
+ if (xenvif_schedulable(queue->vif))
+ xenvif_wake_queue(queue);
}
int xenvif_kthread_guest_rx(void *data)
{
- struct xenvif *vif = data;
+ struct xenvif_queue *queue = data;
struct sk_buff *skb;
while (!kthread_should_stop()) {
- wait_event_interruptible(vif->wq,
- rx_work_todo(vif) ||
- vif->disabled ||
+ wait_event_interruptible(queue->wq,
+ rx_work_todo(queue) ||
+ queue->vif->disabled ||
kthread_should_stop());
/* This frontend is found to be rogue, disable it in
* kthread context. Currently this is only set when
* netback finds out frontend sends malformed packet,
* but we cannot disable the interface in softirq
- * context so we defer it here.
+ * context so we defer it here, if this thread is
+ * associated with queue 0.
*/
- if (unlikely(vif->disabled && netif_carrier_ok(vif->dev)))
- xenvif_carrier_off(vif);
+ if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0))
+ xenvif_carrier_off(queue->vif);
if (kthread_should_stop())
break;
- if (vif->rx_queue_purge) {
- skb_queue_purge(&vif->rx_queue);
- vif->rx_queue_purge = false;
+ if (queue->rx_queue_purge) {
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_queue_purge = false;
}
- if (!skb_queue_empty(&vif->rx_queue))
- xenvif_rx_action(vif);
+ if (!skb_queue_empty(&queue->rx_queue))
+ xenvif_rx_action(queue);
- if (skb_queue_empty(&vif->rx_queue) &&
- netif_queue_stopped(vif->dev)) {
- del_timer_sync(&vif->wake_queue);
- xenvif_start_queue(vif);
+ if (skb_queue_empty(&queue->rx_queue) &&
+ xenvif_queue_stopped(queue)) {
+ del_timer_sync(&queue->wake_queue);
+ xenvif_start_queue(queue);
}
cond_resched();
}
/* Bin any remaining skbs */
- while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
+ while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
dev_kfree_skb(skb);
return 0;
int xenvif_dealloc_kthread(void *data)
{
- struct xenvif *vif = data;
+ struct xenvif_queue *queue = data;
while (!kthread_should_stop()) {
- wait_event_interruptible(vif->dealloc_wq,
- tx_dealloc_work_todo(vif) ||
+ wait_event_interruptible(queue->dealloc_wq,
+ tx_dealloc_work_todo(queue) ||
kthread_should_stop());
if (kthread_should_stop())
break;
- xenvif_tx_dealloc_action(vif);
+ xenvif_tx_dealloc_action(queue);
cond_resched();
}
/* Unmap anything remaining*/
- if (tx_dealloc_work_todo(vif))
- xenvif_tx_dealloc_action(vif);
+ if (tx_dealloc_work_todo(queue))
+ xenvif_tx_dealloc_action(queue);
return 0;
}
if (!xen_domain())
return -ENODEV;
+ /* Allow as many queues as there are CPUs, by default */
+ xenvif_max_queues = num_online_cpus();
+
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
+/* Registers */
+#define BPF_R0 regs[BPF_REG_0]
+#define BPF_R1 regs[BPF_REG_1]
+#define BPF_R2 regs[BPF_REG_2]
+#define BPF_R3 regs[BPF_REG_3]
+#define BPF_R4 regs[BPF_REG_4]
+#define BPF_R5 regs[BPF_REG_5]
+#define BPF_R6 regs[BPF_REG_6]
+#define BPF_R7 regs[BPF_REG_7]
+#define BPF_R8 regs[BPF_REG_8]
+#define BPF_R9 regs[BPF_REG_9]
+#define BPF_R10 regs[BPF_REG_10]
+
+/* Named registers */
+#define A regs[insn->a_reg]
+#define X regs[insn->x_reg]
+#define FP regs[BPF_REG_FP]
+#define ARG1 regs[BPF_REG_ARG1]
+#define CTX regs[BPF_REG_CTX]
+#define K insn->imm
+
/* No hurry in this branch
*
* Exported for the bpf jit load helper.
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
+
return NULL;
}
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
+
return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
return 0;
}
-/* Register mappings for user programs. */
-#define A_REG 0
-#define X_REG 7
-#define TMP_REG 8
-#define ARG2_REG 2
-#define ARG3_REG 3
-
/**
* __sk_run_filter - run a filter on a given context
* @ctx: buffer to run the filter on
* keep, 0 for none. @ctx is the data we are operating on, @insn is the
* array of filter instructions.
*/
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
- void *ptr;
- int off;
-
-#define K insn->imm
-#define A regs[insn->a_reg]
-#define X regs[insn->x_reg]
-#define R0 regs[0]
-
-#define CONT ({insn++; goto select_insn; })
-#define CONT_JMP ({insn++; goto select_insn; })
-
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
-#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
- DL(BPF_ALU, BPF_ADD, BPF_X),
- DL(BPF_ALU, BPF_ADD, BPF_K),
- DL(BPF_ALU, BPF_SUB, BPF_X),
- DL(BPF_ALU, BPF_SUB, BPF_K),
- DL(BPF_ALU, BPF_AND, BPF_X),
- DL(BPF_ALU, BPF_AND, BPF_K),
- DL(BPF_ALU, BPF_OR, BPF_X),
- DL(BPF_ALU, BPF_OR, BPF_K),
- DL(BPF_ALU, BPF_LSH, BPF_X),
- DL(BPF_ALU, BPF_LSH, BPF_K),
- DL(BPF_ALU, BPF_RSH, BPF_X),
- DL(BPF_ALU, BPF_RSH, BPF_K),
- DL(BPF_ALU, BPF_XOR, BPF_X),
- DL(BPF_ALU, BPF_XOR, BPF_K),
- DL(BPF_ALU, BPF_MUL, BPF_X),
- DL(BPF_ALU, BPF_MUL, BPF_K),
- DL(BPF_ALU, BPF_MOV, BPF_X),
- DL(BPF_ALU, BPF_MOV, BPF_K),
- DL(BPF_ALU, BPF_DIV, BPF_X),
- DL(BPF_ALU, BPF_DIV, BPF_K),
- DL(BPF_ALU, BPF_MOD, BPF_X),
- DL(BPF_ALU, BPF_MOD, BPF_K),
- DL(BPF_ALU, BPF_NEG, 0),
- DL(BPF_ALU, BPF_END, BPF_TO_BE),
- DL(BPF_ALU, BPF_END, BPF_TO_LE),
- DL(BPF_ALU64, BPF_ADD, BPF_X),
- DL(BPF_ALU64, BPF_ADD, BPF_K),
- DL(BPF_ALU64, BPF_SUB, BPF_X),
- DL(BPF_ALU64, BPF_SUB, BPF_K),
- DL(BPF_ALU64, BPF_AND, BPF_X),
- DL(BPF_ALU64, BPF_AND, BPF_K),
- DL(BPF_ALU64, BPF_OR, BPF_X),
- DL(BPF_ALU64, BPF_OR, BPF_K),
- DL(BPF_ALU64, BPF_LSH, BPF_X),
- DL(BPF_ALU64, BPF_LSH, BPF_K),
- DL(BPF_ALU64, BPF_RSH, BPF_X),
- DL(BPF_ALU64, BPF_RSH, BPF_K),
- DL(BPF_ALU64, BPF_XOR, BPF_X),
- DL(BPF_ALU64, BPF_XOR, BPF_K),
- DL(BPF_ALU64, BPF_MUL, BPF_X),
- DL(BPF_ALU64, BPF_MUL, BPF_K),
- DL(BPF_ALU64, BPF_MOV, BPF_X),
- DL(BPF_ALU64, BPF_MOV, BPF_K),
- DL(BPF_ALU64, BPF_ARSH, BPF_X),
- DL(BPF_ALU64, BPF_ARSH, BPF_K),
- DL(BPF_ALU64, BPF_DIV, BPF_X),
- DL(BPF_ALU64, BPF_DIV, BPF_K),
- DL(BPF_ALU64, BPF_MOD, BPF_X),
- DL(BPF_ALU64, BPF_MOD, BPF_K),
- DL(BPF_ALU64, BPF_NEG, 0),
- DL(BPF_JMP, BPF_CALL, 0),
- DL(BPF_JMP, BPF_JA, 0),
- DL(BPF_JMP, BPF_JEQ, BPF_X),
- DL(BPF_JMP, BPF_JEQ, BPF_K),
- DL(BPF_JMP, BPF_JNE, BPF_X),
- DL(BPF_JMP, BPF_JNE, BPF_K),
- DL(BPF_JMP, BPF_JGT, BPF_X),
- DL(BPF_JMP, BPF_JGT, BPF_K),
- DL(BPF_JMP, BPF_JGE, BPF_X),
- DL(BPF_JMP, BPF_JGE, BPF_K),
- DL(BPF_JMP, BPF_JSGT, BPF_X),
- DL(BPF_JMP, BPF_JSGT, BPF_K),
- DL(BPF_JMP, BPF_JSGE, BPF_X),
- DL(BPF_JMP, BPF_JSGE, BPF_K),
- DL(BPF_JMP, BPF_JSET, BPF_X),
- DL(BPF_JMP, BPF_JSET, BPF_K),
- DL(BPF_JMP, BPF_EXIT, 0),
- DL(BPF_STX, BPF_MEM, BPF_B),
- DL(BPF_STX, BPF_MEM, BPF_H),
- DL(BPF_STX, BPF_MEM, BPF_W),
- DL(BPF_STX, BPF_MEM, BPF_DW),
- DL(BPF_STX, BPF_XADD, BPF_W),
- DL(BPF_STX, BPF_XADD, BPF_DW),
- DL(BPF_ST, BPF_MEM, BPF_B),
- DL(BPF_ST, BPF_MEM, BPF_H),
- DL(BPF_ST, BPF_MEM, BPF_W),
- DL(BPF_ST, BPF_MEM, BPF_DW),
- DL(BPF_LDX, BPF_MEM, BPF_B),
- DL(BPF_LDX, BPF_MEM, BPF_H),
- DL(BPF_LDX, BPF_MEM, BPF_W),
- DL(BPF_LDX, BPF_MEM, BPF_DW),
- DL(BPF_LD, BPF_ABS, BPF_W),
- DL(BPF_LD, BPF_ABS, BPF_H),
- DL(BPF_LD, BPF_ABS, BPF_B),
- DL(BPF_LD, BPF_IND, BPF_W),
- DL(BPF_LD, BPF_IND, BPF_H),
- DL(BPF_LD, BPF_IND, BPF_B),
-#undef DL
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+ [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+ [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+ [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+ [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+ [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+ [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
+ [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
+ [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+ [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+ [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+ [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+ [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+ [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+ [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+ [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+ [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+ [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+ [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+ [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+ [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+ [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+ [BPF_ALU | BPF_NEG] = &&ALU_NEG,
+ [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+ [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+ /* 64 bit ALU operations */
+ [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+ [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+ [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+ [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+ [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+ [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+ [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+ [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+ [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+ [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+ [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+ [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+ [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+ [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+ [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+ [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+ [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+ [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+ [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+ [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+ [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+ [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+ [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+ [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+ [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+ /* Call instruction */
+ [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ /* Jumps */
+ [BPF_JMP | BPF_JA] = &&JMP_JA,
+ [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+ [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+ [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+ [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+ [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+ [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+ [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+ [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+ [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+ [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+ [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+ [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+ [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+ [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+ /* Program return */
+ [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+ /* Store instructions */
+ [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+ [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+ [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+ [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+ [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+ [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+ [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+ [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+ [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+ [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+ /* Load instructions */
+ [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+ [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+ [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+ [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+ [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+ [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+ [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+ [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+ [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+ [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
};
+ void *ptr;
+ int off;
+
+#define CONT ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
- regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
- regs[ARG1_REG] = (u64) (unsigned long) ctx;
- regs[A_REG] = 0;
- regs[X_REG] = 0;
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+ ARG1 = (u64) (unsigned long) ctx;
+
+ /* Register for user BPF programs need to be reset first. */
+ regs[BPF_REG_A] = 0;
+ regs[BPF_REG_X] = 0;
select_insn:
goto *jumptable[insn->code];
/* ALU */
#define ALU(OPCODE, OP) \
- BPF_ALU64_##OPCODE##_BPF_X: \
+ ALU64_##OPCODE##_X: \
A = A OP X; \
CONT; \
- BPF_ALU_##OPCODE##_BPF_X: \
+ ALU_##OPCODE##_X: \
A = (u32) A OP (u32) X; \
CONT; \
- BPF_ALU64_##OPCODE##_BPF_K: \
+ ALU64_##OPCODE##_K: \
A = A OP K; \
CONT; \
- BPF_ALU_##OPCODE##_BPF_K: \
+ ALU_##OPCODE##_K: \
A = (u32) A OP (u32) K; \
CONT;
- ALU(BPF_ADD, +)
- ALU(BPF_SUB, -)
- ALU(BPF_AND, &)
- ALU(BPF_OR, |)
- ALU(BPF_LSH, <<)
- ALU(BPF_RSH, >>)
- ALU(BPF_XOR, ^)
- ALU(BPF_MUL, *)
+ ALU(ADD, +)
+ ALU(SUB, -)
+ ALU(AND, &)
+ ALU(OR, |)
+ ALU(LSH, <<)
+ ALU(RSH, >>)
+ ALU(XOR, ^)
+ ALU(MUL, *)
#undef ALU
- BPF_ALU_BPF_NEG_0:
+ ALU_NEG:
A = (u32) -A;
CONT;
- BPF_ALU64_BPF_NEG_0:
+ ALU64_NEG:
A = -A;
CONT;
- BPF_ALU_BPF_MOV_BPF_X:
+ ALU_MOV_X:
A = (u32) X;
CONT;
- BPF_ALU_BPF_MOV_BPF_K:
+ ALU_MOV_K:
A = (u32) K;
CONT;
- BPF_ALU64_BPF_MOV_BPF_X:
+ ALU64_MOV_X:
A = X;
CONT;
- BPF_ALU64_BPF_MOV_BPF_K:
+ ALU64_MOV_K:
A = K;
CONT;
- BPF_ALU64_BPF_ARSH_BPF_X:
+ ALU64_ARSH_X:
(*(s64 *) &A) >>= X;
CONT;
- BPF_ALU64_BPF_ARSH_BPF_K:
+ ALU64_ARSH_K:
(*(s64 *) &A) >>= K;
CONT;
- BPF_ALU64_BPF_MOD_BPF_X:
+ ALU64_MOD_X:
if (unlikely(X == 0))
return 0;
tmp = A;
A = do_div(tmp, X);
CONT;
- BPF_ALU_BPF_MOD_BPF_X:
+ ALU_MOD_X:
if (unlikely(X == 0))
return 0;
tmp = (u32) A;
A = do_div(tmp, (u32) X);
CONT;
- BPF_ALU64_BPF_MOD_BPF_K:
+ ALU64_MOD_K:
tmp = A;
A = do_div(tmp, K);
CONT;
- BPF_ALU_BPF_MOD_BPF_K:
+ ALU_MOD_K:
tmp = (u32) A;
A = do_div(tmp, (u32) K);
CONT;
- BPF_ALU64_BPF_DIV_BPF_X:
+ ALU64_DIV_X:
if (unlikely(X == 0))
return 0;
do_div(A, X);
CONT;
- BPF_ALU_BPF_DIV_BPF_X:
+ ALU_DIV_X:
if (unlikely(X == 0))
return 0;
tmp = (u32) A;
do_div(tmp, (u32) X);
A = (u32) tmp;
CONT;
- BPF_ALU64_BPF_DIV_BPF_K:
+ ALU64_DIV_K:
do_div(A, K);
CONT;
- BPF_ALU_BPF_DIV_BPF_K:
+ ALU_DIV_K:
tmp = (u32) A;
do_div(tmp, (u32) K);
A = (u32) tmp;
CONT;
- BPF_ALU_BPF_END_BPF_TO_BE:
+ ALU_END_TO_BE:
switch (K) {
case 16:
A = (__force u16) cpu_to_be16(A);
break;
}
CONT;
- BPF_ALU_BPF_END_BPF_TO_LE:
+ ALU_END_TO_LE:
switch (K) {
case 16:
A = (__force u16) cpu_to_le16(A);
CONT;
/* CALL */
- BPF_JMP_BPF_CALL_0:
- /* Function call scratches R1-R5 registers, preserves R6-R9,
- * and stores return value into R0.
+ JMP_CALL:
+ /* Function call scratches BPF_R1-BPF_R5 registers,
+ * preserves BPF_R6-BPF_R9, and stores return value
+ * into BPF_R0.
*/
- R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
- regs[4], regs[5]);
+ BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
+ BPF_R4, BPF_R5);
CONT;
/* JMP */
- BPF_JMP_BPF_JA_0:
+ JMP_JA:
insn += insn->off;
CONT;
- BPF_JMP_BPF_JEQ_BPF_X:
+ JMP_JEQ_X:
if (A == X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JEQ_BPF_K:
+ JMP_JEQ_K:
if (A == K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JNE_BPF_X:
+ JMP_JNE_X:
if (A != X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JNE_BPF_K:
+ JMP_JNE_K:
if (A != K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGT_BPF_X:
+ JMP_JGT_X:
if (A > X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGT_BPF_K:
+ JMP_JGT_K:
if (A > K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGE_BPF_X:
+ JMP_JGE_X:
if (A >= X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGE_BPF_K:
+ JMP_JGE_K:
if (A >= K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGT_BPF_X:
- if (((s64)A) > ((s64)X)) {
+ JMP_JSGT_X:
+ if (((s64) A) > ((s64) X)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGT_BPF_K:
- if (((s64)A) > ((s64)K)) {
+ JMP_JSGT_K:
+ if (((s64) A) > ((s64) K)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGE_BPF_X:
- if (((s64)A) >= ((s64)X)) {
+ JMP_JSGE_X:
+ if (((s64) A) >= ((s64) X)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGE_BPF_K:
- if (((s64)A) >= ((s64)K)) {
+ JMP_JSGE_K:
+ if (((s64) A) >= ((s64) K)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSET_BPF_X:
+ JMP_JSET_X:
if (A & X) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSET_BPF_K:
+ JMP_JSET_K:
if (A & K) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_EXIT_0:
- return R0;
+ JMP_EXIT:
+ return BPF_R0;
/* STX and ST and LDX*/
#define LDST(SIZEOP, SIZE) \
- BPF_STX_BPF_MEM_##SIZEOP: \
+ STX_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (A + insn->off) = X; \
CONT; \
- BPF_ST_BPF_MEM_##SIZEOP: \
+ ST_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (A + insn->off) = K; \
CONT; \
- BPF_LDX_BPF_MEM_##SIZEOP: \
+ LDX_MEM_##SIZEOP: \
A = *(SIZE *)(unsigned long) (X + insn->off); \
CONT;
- LDST(BPF_B, u8)
- LDST(BPF_H, u16)
- LDST(BPF_W, u32)
- LDST(BPF_DW, u64)
+ LDST(B, u8)
+ LDST(H, u16)
+ LDST(W, u32)
+ LDST(DW, u64)
#undef LDST
- BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
+ STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
atomic_add((u32) X, (atomic_t *)(unsigned long)
(A + insn->off));
CONT;
- BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
+ STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
atomic64_add((u64) X, (atomic64_t *)(unsigned long)
(A + insn->off));
CONT;
- BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
+ LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */
off = K;
load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
- * appearing in the programs where ctx == skb. All programs
- * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
- * saves it in R6, internal BPF verifier will check that
- * R6 == ctx.
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
+ * only appearing in the programs where ctx ==
+ * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
+ * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
+ * internal BPF verifier will check that BPF_R6 ==
+ * ctx.
*
- * BPF_ABS and BPF_IND are wrappers of function calls, so
- * they scratch R1-R5 registers, preserve R6-R9, and store
- * return value into R0.
+ * BPF_ABS and BPF_IND are wrappers of function calls,
+ * so they scratch BPF_R1-BPF_R5 registers, preserve
+ * BPF_R6-BPF_R9, and store return value into BPF_R0.
*
* Implicit input:
* ctx
* K == 32-bit immediate
*
* Output:
- * R0 - 8/16/32-bit skb data converted to cpu endianness
+ * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
*/
+
ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
if (likely(ptr != NULL)) {
- R0 = get_unaligned_be32(ptr);
+ BPF_R0 = get_unaligned_be32(ptr);
CONT;
}
+
return 0;
- BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
+ LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
off = K;
load_half:
ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
if (likely(ptr != NULL)) {
- R0 = get_unaligned_be16(ptr);
+ BPF_R0 = get_unaligned_be16(ptr);
CONT;
}
+
return 0;
- BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
+ LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
off = K;
load_byte:
ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
if (likely(ptr != NULL)) {
- R0 = *(u8 *)ptr;
+ BPF_R0 = *(u8 *)ptr;
CONT;
}
+
return 0;
- BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
+ LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
off = K + X;
goto load_word;
- BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
+ LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */
off = K + X;
goto load_half;
- BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
+ LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */
off = K + X;
goto load_byte;
/* If we ever reach this, we have a bug somewhere. */
WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
return 0;
-#undef CONT_JMP
-#undef CONT
-
-#undef R0
-#undef X
-#undef A
-#undef K
}
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
/* Helper to find the offset of pkt_type in sk_buff structure. We want
* to make sure its still a 3bit field starting at a byte boundary;
* taken from arch/x86/net/bpf_jit_comp.c.
*/
+ #ifdef __BIG_ENDIAN_BITFIELD
+ #define PKT_TYPE_MAX (7 << 5)
+ #else
#define PKT_TYPE_MAX 7
+ #endif
static unsigned int pkt_type_offset(void)
{
struct sk_buff skb_probe = { .pkt_type = ~0, };
return -1;
}
-static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
-
- return __skb_get_poff(skb);
+ return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
}
-static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
if (skb->len < sizeof(struct nlattr))
return 0;
- if (A > skb->len - sizeof(struct nlattr))
+ if (a > skb->len - sizeof(struct nlattr))
return 0;
- nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
+ nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
if (nla)
return (void *) nla - (void *) skb->data;
return 0;
}
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
if (skb->len < sizeof(struct nlattr))
return 0;
- if (A > skb->len - sizeof(struct nlattr))
+ if (a > skb->len - sizeof(struct nlattr))
return 0;
- nla = (struct nlattr *) &skb->data[A];
- if (nla->nla_len > skb->len - A)
+ nla = (struct nlattr *) &skb->data[a];
+ if (nla->nla_len > skb->len - a)
return 0;
- nla = nla_find_nested(nla, X);
+ nla = nla_find_nested(nla, x);
if (nla)
return (void *) nla - (void *) skb->data;
return 0;
}
-static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
return raw_smp_processor_id();
}
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ return prandom_u32();
+}
+
static bool convert_bpf_extensions(struct sock_filter *fp,
struct sock_filter_int **insnp)
{
case SKF_AD_OFF + SKF_AD_PROTOCOL:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, protocol);
- insn++;
-
+ /* A = *(u16 *) (ctx + offsetof(protocol)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, protocol));
/* A = ntohs(A) [emitting a nop or swap16] */
- insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
- insn->a_reg = A_REG;
- insn->imm = 16;
+ *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
- insn->code = BPF_LDX | BPF_MEM | BPF_B;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = pkt_type_offset();
+ *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+ pkt_type_offset());
if (insn->off < 0)
return false;
insn++;
-
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = PKT_TYPE_MAX;
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
+ #ifdef __BIG_ENDIAN_BITFIELD
+ insn++;
-
- insn->code = BPF_ALU | BPF_RSH | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 5;
++ *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
+ #endif
break;
case SKF_AD_OFF + SKF_AD_IFINDEX:
case SKF_AD_OFF + SKF_AD_HATYPE:
- if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
- insn->code = BPF_LDX | BPF_MEM | BPF_DW;
- else
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = TMP_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, dev);
- insn++;
-
- insn->code = BPF_JMP | BPF_JNE | BPF_K;
- insn->a_reg = TMP_REG;
- insn->imm = 0;
- insn->off = 1;
- insn++;
-
- insn->code = BPF_JMP | BPF_EXIT;
- insn++;
-
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
-
- if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->off = offsetof(struct net_device, ifindex);
- } else {
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->off = offsetof(struct net_device, type);
- }
+ BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
+
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ BPF_REG_TMP, BPF_REG_CTX,
+ offsetof(struct sk_buff, dev));
+ /* if (tmp != 0) goto pc + 1 */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+ *insn++ = BPF_EXIT_INSN();
+ if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, ifindex));
+ else
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, type));
break;
case SKF_AD_OFF + SKF_AD_MARK:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, mark);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, mark));
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, hash);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, hash));
break;
case SKF_AD_OFF + SKF_AD_QUEUE:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, queue_mapping);
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, queue_mapping));
break;
case SKF_AD_OFF + SKF_AD_VLAN_TAG:
case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, vlan_tci);
- insn++;
-
BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, vlan_tci));
if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = ~VLAN_TAG_PRESENT;
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
+ ~VLAN_TAG_PRESENT);
} else {
- insn->code = BPF_ALU | BPF_RSH | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 12;
- insn++;
-
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 1;
+ /* A >>= 12 */
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
+ /* A &= 1 */
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
}
break;
case SKF_AD_OFF + SKF_AD_NLATTR:
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
case SKF_AD_OFF + SKF_AD_CPU:
+ case SKF_AD_OFF + SKF_AD_RANDOM:
/* arg1 = ctx */
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG1_REG;
- insn->x_reg = CTX_REG;
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
/* arg2 = A */
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG2_REG;
- insn->x_reg = A_REG;
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
/* arg3 = X */
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG3_REG;
- insn->x_reg = X_REG;
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
/* Emit call(ctx, arg2=A, arg3=X) */
- insn->code = BPF_JMP | BPF_CALL;
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
- insn->imm = __skb_get_pay_offset - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
break;
case SKF_AD_OFF + SKF_AD_NLATTR:
- insn->imm = __skb_get_nlattr - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr);
break;
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
- insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
break;
case SKF_AD_OFF + SKF_AD_CPU:
- insn->imm = __get_raw_cpu_id - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
+ break;
+ case SKF_AD_OFF + SKF_AD_RANDOM:
+ *insn = BPF_EMIT_CALL(__get_random_u32);
break;
}
break;
case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
- insn->code = BPF_ALU | BPF_XOR | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ /* A ^= X */
+ *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
break;
default:
u8 bpf_src;
BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
- BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
+ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
if (len <= 0 || len >= BPF_MAXINSNS)
return -EINVAL;
new_insn = new_prog;
fp = prog;
- if (new_insn) {
- new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- new_insn->a_reg = CTX_REG;
- new_insn->x_reg = ARG1_REG;
- }
+ if (new_insn)
+ *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
new_insn++;
for (i = 0; i < len; fp++, i++) {
convert_bpf_extensions(fp, &insn))
break;
- insn->code = fp->code;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
- insn->imm = fp->k;
+ *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
- /* Jump opcodes map as-is, but offsets need adjustment. */
- case BPF_JMP | BPF_JA:
- target = i + fp->k + 1;
- insn->code = fp->code;
-#define EMIT_JMP \
+ /* Jump transformation cannot use BPF block macros
+ * everywhere as offset calculation and target updates
+ * require a bit more work than the rest, i.e. jump
+ * opcodes map as-is, but offsets need adjustment.
+ */
+
+#define BPF_EMIT_JMP \
do { \
if (target >= len || target < 0) \
goto err; \
insn->off -= insn - tmp_insns; \
} while (0)
- EMIT_JMP;
+ case BPF_JMP | BPF_JA:
+ target = i + fp->k + 1;
+ insn->code = fp->code;
+ BPF_EMIT_JMP;
break;
case BPF_JMP | BPF_JEQ | BPF_K:
* immediate into tmp register and use it
* in compare insn.
*/
- insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = TMP_REG;
- insn->imm = fp->k;
- insn++;
+ *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_TMP;
bpf_src = BPF_X;
} else {
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ insn->a_reg = BPF_REG_A;
+ insn->x_reg = BPF_REG_X;
insn->imm = fp->k;
bpf_src = BPF_SRC(fp->code);
}
if (fp->jf == 0) {
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
target = i + fp->jt + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
}
if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
insn->code = BPF_JMP | BPF_JNE | bpf_src;
target = i + fp->jf + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
}
/* Other jumps are mapped into two insns: Jxx and JA. */
target = i + fp->jt + 1;
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
- EMIT_JMP;
+ BPF_EMIT_JMP;
insn++;
insn->code = BPF_JMP | BPF_JA;
target = i + fp->jf + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B:
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = TMP_REG;
- insn->x_reg = A_REG;
- insn++;
-
- insn->code = BPF_LD | BPF_ABS | BPF_B;
- insn->a_reg = A_REG;
- insn->imm = fp->k;
- insn++;
-
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 0xf;
- insn++;
-
- insn->code = BPF_ALU | BPF_LSH | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 2;
- insn++;
-
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = X_REG;
- insn->x_reg = A_REG;
- insn++;
-
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ /* tmp = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
+ /* A = BPF_R0 = *(u8 *) (skb->data + K) */
+ *insn++ = BPF_LD_ABS(BPF_B, fp->k);
+ /* A &= 0xf */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
+ /* A <<= 2 */
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
+ /* X = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
+ /* A = tmp */
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
/* RET_K, RET_A are remaped into 2 insns. */
case BPF_RET | BPF_A:
case BPF_RET | BPF_K:
- insn->code = BPF_ALU | BPF_MOV |
- (BPF_RVAL(fp->code) == BPF_K ?
- BPF_K : BPF_X);
- insn->a_reg = 0;
- insn->x_reg = A_REG;
- insn->imm = fp->k;
- insn++;
-
- insn->code = BPF_JMP | BPF_EXIT;
+ *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+ BPF_K : BPF_X, BPF_REG_0,
+ BPF_REG_A, fp->k);
+ *insn = BPF_EXIT_INSN();
break;
/* Store to stack. */
case BPF_ST:
case BPF_STX:
- insn->code = BPF_STX | BPF_MEM | BPF_W;
- insn->a_reg = FP_REG;
- insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
- insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+ *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+ BPF_ST ? BPF_REG_A : BPF_REG_X,
+ -(BPF_MEMWORDS - fp->k) * 4);
break;
/* Load from stack. */
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->x_reg = FP_REG;
- insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+ -(BPF_MEMWORDS - fp->k) * 4);
break;
/* A = K or X = K */
case BPF_LD | BPF_IMM:
case BPF_LDX | BPF_IMM:
- insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->imm = fp->k;
+ *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, fp->k);
break;
/* X = A */
case BPF_MISC | BPF_TAX:
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = X_REG;
- insn->x_reg = A_REG;
+ *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
break;
/* A = X */
case BPF_MISC | BPF_TXA:
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
break;
/* A = skb->len or X = skb->len */
case BPF_LD | BPF_W | BPF_LEN:
case BPF_LDX | BPF_W | BPF_LEN:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, len);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+ offsetof(struct sk_buff, len));
break;
- /* access seccomp_data fields */
+ /* Access seccomp_data fields. */
case BPF_LDX | BPF_ABS | BPF_W:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = fp->k;
+ /* A = *(u32 *) (ctx + K) */
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
break;
+ /* Unkown instruction. */
default:
goto err;
}
if (new_prog)
memcpy(new_insn, tmp_insns,
sizeof(*insn) * (insn - tmp_insns));
-
new_insn += insn - tmp_insns;
}
new_flen = new_insn - new_prog;
if (pass > 2)
goto err;
-
goto do_pass;
}
*/
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
- u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+ u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
int pc, ret = 0;
BUILD_BUG_ON(BPF_MEMWORDS > 16);
+
masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
if (!masks)
return -ENOMEM;
+
memset(masks, 0xff, flen * sizeof(*masks));
for (pc = 0; pc < flen; pc++) {
memvalid &= masks[pc];
switch (filter[pc].code) {
- case BPF_S_ST:
- case BPF_S_STX:
+ case BPF_ST:
+ case BPF_STX:
memvalid |= (1 << filter[pc].k);
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
if (!(memvalid & (1 << filter[pc].k))) {
ret = -EINVAL;
goto error;
}
break;
- case BPF_S_JMP_JA:
- /* a jump must set masks on target */
+ case BPF_JMP | BPF_JA:
+ /* A jump must set masks on target */
masks[pc + 1 + filter[pc].k] &= memvalid;
memvalid = ~0;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* a jump must set masks on targets */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* A jump must set masks on targets */
masks[pc + 1 + filter[pc].jt] &= memvalid;
masks[pc + 1 + filter[pc].jf] &= memvalid;
memvalid = ~0;
return ret;
}
+static bool chk_code_allowed(u16 code_to_probe)
+{
+ static const bool codes[] = {
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_K] = true,
+ [BPF_ALU | BPF_ADD | BPF_X] = true,
+ [BPF_ALU | BPF_SUB | BPF_K] = true,
+ [BPF_ALU | BPF_SUB | BPF_X] = true,
+ [BPF_ALU | BPF_MUL | BPF_K] = true,
+ [BPF_ALU | BPF_MUL | BPF_X] = true,
+ [BPF_ALU | BPF_DIV | BPF_K] = true,
+ [BPF_ALU | BPF_DIV | BPF_X] = true,
+ [BPF_ALU | BPF_MOD | BPF_K] = true,
+ [BPF_ALU | BPF_MOD | BPF_X] = true,
+ [BPF_ALU | BPF_AND | BPF_K] = true,
+ [BPF_ALU | BPF_AND | BPF_X] = true,
+ [BPF_ALU | BPF_OR | BPF_K] = true,
+ [BPF_ALU | BPF_OR | BPF_X] = true,
+ [BPF_ALU | BPF_XOR | BPF_K] = true,
+ [BPF_ALU | BPF_XOR | BPF_X] = true,
+ [BPF_ALU | BPF_LSH | BPF_K] = true,
+ [BPF_ALU | BPF_LSH | BPF_X] = true,
+ [BPF_ALU | BPF_RSH | BPF_K] = true,
+ [BPF_ALU | BPF_RSH | BPF_X] = true,
+ [BPF_ALU | BPF_NEG] = true,
+ /* Load instructions */
+ [BPF_LD | BPF_W | BPF_ABS] = true,
+ [BPF_LD | BPF_H | BPF_ABS] = true,
+ [BPF_LD | BPF_B | BPF_ABS] = true,
+ [BPF_LD | BPF_W | BPF_LEN] = true,
+ [BPF_LD | BPF_W | BPF_IND] = true,
+ [BPF_LD | BPF_H | BPF_IND] = true,
+ [BPF_LD | BPF_B | BPF_IND] = true,
+ [BPF_LD | BPF_IMM] = true,
+ [BPF_LD | BPF_MEM] = true,
+ [BPF_LDX | BPF_W | BPF_LEN] = true,
+ [BPF_LDX | BPF_B | BPF_MSH] = true,
+ [BPF_LDX | BPF_IMM] = true,
+ [BPF_LDX | BPF_MEM] = true,
+ /* Store instructions */
+ [BPF_ST] = true,
+ [BPF_STX] = true,
+ /* Misc instructions */
+ [BPF_MISC | BPF_TAX] = true,
+ [BPF_MISC | BPF_TXA] = true,
+ /* Return instructions */
+ [BPF_RET | BPF_K] = true,
+ [BPF_RET | BPF_A] = true,
+ /* Jump instructions */
+ [BPF_JMP | BPF_JA] = true,
+ [BPF_JMP | BPF_JEQ | BPF_K] = true,
+ [BPF_JMP | BPF_JEQ | BPF_X] = true,
+ [BPF_JMP | BPF_JGE | BPF_K] = true,
+ [BPF_JMP | BPF_JGE | BPF_X] = true,
+ [BPF_JMP | BPF_JGT | BPF_K] = true,
+ [BPF_JMP | BPF_JGT | BPF_X] = true,
+ [BPF_JMP | BPF_JSET | BPF_K] = true,
+ [BPF_JMP | BPF_JSET | BPF_X] = true,
+ };
+
+ if (code_to_probe >= ARRAY_SIZE(codes))
+ return false;
+
+ return codes[code_to_probe];
+}
+
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
*/
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
{
- /*
- * Valid instructions are initialized to non-0.
- * Invalid instructions are initialized to 0.
- */
- static const u8 codes[] = {
- [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
- [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
- [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
- [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
- [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
- [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
- [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
- [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
- [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
- [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
- [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
- [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
- [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
- [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
- [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
- [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
- [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
- [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
- [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
- [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
- [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
- [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
- [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
- [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
- [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
- [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
- [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
- [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
- [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
- [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
- [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
- [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
- [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
- [BPF_RET|BPF_K] = BPF_S_RET_K,
- [BPF_RET|BPF_A] = BPF_S_RET_A,
- [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
- [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
- [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
- [BPF_ST] = BPF_S_ST,
- [BPF_STX] = BPF_S_STX,
- [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
- [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
- [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
- [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
- [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
- [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
- [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
- [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
- [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
- };
- int pc;
bool anc_found;
+ int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
- /* check the filter code now */
+ /* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
struct sock_filter *ftest = &filter[pc];
- u16 code = ftest->code;
- if (code >= ARRAY_SIZE(codes))
- return -EINVAL;
- code = codes[code];
- if (!code)
+ /* May we actually operate on this code? */
+ if (!chk_code_allowed(ftest->code))
return -EINVAL;
+
/* Some instructions need special checks */
- switch (code) {
- case BPF_S_ALU_DIV_K:
- case BPF_S_ALU_MOD_K:
- /* check for division by zero */
+ switch (ftest->code) {
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ /* Check for division by zero */
if (ftest->k == 0)
return -EINVAL;
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
- case BPF_S_ST:
- case BPF_S_STX:
- /* check for invalid memory addresses */
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
+ case BPF_ST:
+ case BPF_STX:
+ /* Check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
break;
- case BPF_S_JMP_JA:
- /*
- * Note, the large ftest->k might cause loops.
+ case BPF_JMP | BPF_JA:
+ /* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
- if (ftest->k >= (unsigned int)(flen-pc-1))
+ if (ftest->k >= (unsigned int)(flen - pc - 1))
return -EINVAL;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* for conditionals both must be safe */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* Both conditionals must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
break;
- case BPF_S_LD_W_ABS:
- case BPF_S_LD_H_ABS:
- case BPF_S_LD_B_ABS:
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
- code = BPF_S_ANC_##CODE; \
- anc_found = true; \
- break
- switch (ftest->k) {
- ANCILLARY(PROTOCOL);
- ANCILLARY(PKTTYPE);
- ANCILLARY(IFINDEX);
- ANCILLARY(NLATTR);
- ANCILLARY(NLATTR_NEST);
- ANCILLARY(MARK);
- ANCILLARY(QUEUE);
- ANCILLARY(HATYPE);
- ANCILLARY(RXHASH);
- ANCILLARY(CPU);
- ANCILLARY(ALU_XOR_X);
- ANCILLARY(VLAN_TAG);
- ANCILLARY(VLAN_TAG_PRESENT);
- ANCILLARY(PAY_OFFSET);
- }
-
- /* ancillary operation unknown or unsupported */
+ if (bpf_anc_helper(ftest) & BPF_ANC)
+ anc_found = true;
+ /* Ancillary operation unknown or unsupported */
if (anc_found == false && ftest->k >= SKF_AD_OFF)
return -EINVAL;
}
- ftest->code = code;
}
- /* last instruction must be a RET code */
+ /* Last instruction must be a RET code */
switch (filter[flen - 1].code) {
- case BPF_S_RET_K:
- case BPF_S_RET_A:
+ case BPF_RET | BPF_K:
+ case BPF_RET | BPF_A:
return check_load_and_stores(filter, flen);
}
+
return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
sk_release_orig_filter(fp);
- bpf_jit_free(fp);
+ sk_filter_free(fp);
}
/**
fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
if (fp_new) {
- memcpy(fp_new, fp, sizeof(struct sk_filter));
+ *fp_new = *fp;
/* As we're kepping orig_prog in fp_new along,
* we need to make sure we're not evicting it
* from the old fp.
{
struct sock_filter *old_prog;
struct sk_filter *old_fp;
- int i, err, new_len, old_len = fp->len;
+ int err, new_len, old_len = fp->len;
/* We are free to overwrite insns et al right here as it
* won't be used at this point in time anymore internally
BUILD_BUG_ON(sizeof(struct sock_filter) !=
sizeof(struct sock_filter_int));
- /* For now, we need to unfiddle BPF_S_* identifiers in place.
- * This can sooner or later on be subject to removal, e.g. when
- * JITs have been converted.
- */
- for (i = 0; i < fp->len; i++)
- sk_decode_filter(&fp->insns[i], &fp->insns[i]);
-
/* Conversion cannot happen on overlapping memory areas,
* so we need to keep the user BPF around until the 2nd
* pass. At this time, the user BPF is stored in fp->insns.
goto out_err_free;
}
- fp->bpf_func = sk_run_filter_int_skb;
fp->len = new_len;
/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
*/
goto out_err_free;
+ sk_filter_select_runtime(fp);
+
kfree(old_prog);
return fp;
return ERR_PTR(err);
}
+void __weak bpf_int_jit_compile(struct sk_filter *prog)
+{
+}
+
+/**
+ * sk_filter_select_runtime - select execution runtime for BPF program
+ * @fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+ fp->bpf_func = (void *) __sk_run_filter;
+
+ /* Probe if internal BPF can be JITed */
+ bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+ bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
struct sock *sk)
{
* a negative errno code is returned. On success the return is zero.
*/
int sk_unattached_filter_create(struct sk_filter **pfp,
- struct sock_fprog *fprog)
+ struct sock_fprog_kern *fprog)
{
unsigned int fsize = sk_filter_proglen(fprog);
struct sk_filter *fp;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
- static const u16 decodes[] = {
- [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
- [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
- [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
- [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
- [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
- [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
- [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
- [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
- [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
- [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
- [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
- [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
- [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
- [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
- [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
- [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
- [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
- [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
- [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
- [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
- [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
- [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
- [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
- [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
- [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
- [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
- [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
- [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
- [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
- [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
- [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
- [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
- [BPF_S_RET_K] = BPF_RET|BPF_K,
- [BPF_S_RET_A] = BPF_RET|BPF_A,
- [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
- [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
- [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
- [BPF_S_ST] = BPF_ST,
- [BPF_S_STX] = BPF_STX,
- [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
- [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
- [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
- [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
- [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
- [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
- [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
- [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
- [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
- };
- u16 code;
-
- code = filt->code;
-
- to->code = decodes[code];
- to->jt = filt->jt;
- to->jf = filt->jf;
- to->k = filt->k;
-}
-
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
{
if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
int ret;
+ /* Verify checksum before giving to encap */
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
ret = encap_rcv(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(sock_net(sk),
if (inet->inet_dport != rmt_port)
continue;
}
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
+ if (!ipv6_addr_any(&s->sk_v6_daddr) &&
+ !ipv6_addr_equal(&s->sk_v6_daddr, rmt_addr))
continue;
if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&s->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&s->sk_v6_rcv_saddr, loc_addr))
continue;
}
if (!inet6_mc_check(s, loc_addr, rmt_addr))
if (unlikely(skb1))
kfree_skb(skb1);
}
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ * this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) {
- stack[count++] = sk;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (uh->check || udp_sk(sk)->no_check6_rx)
+ stack[count++] = sk;
+
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) {
if (sk != NULL) {
int ret;
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ sock_put(sk);
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
return 0;
}
+ if (!uh->check) {
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
- else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+ else if (up->no_check6_tx) { /* UDP csum disabled */
+ skb->ip_summed = CHECKSUM_NONE;
+ goto send;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
up->len);
goto send;
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};