Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

author Jakub Kicinski <kuba@kernel.org>

Mon, 24 Jan 2022 23:42:28 +0000 (15:42 -0800)

committer Jakub Kicinski <kuba@kernel.org>

Mon, 24 Jan 2022 23:42:29 +0000 (15:42 -0800)
author Jakub Kicinski <kuba@kernel.org>
Mon, 24 Jan 2022 23:42:28 +0000 (15:42 -0800)
committer Jakub Kicinski <kuba@kernel.org>
Mon, 24 Jan 2022 23:42:29 +0000 (15:42 -0800)
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst

index 1ebf4c5c7ddc2614a7f237e61135236744030ee8..ab08852e53ae2de2e6127d14bbcf0e859c50c169 100644 (file)
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
  In libbpf, the map can be defined with extra annotation like below:
  ::
  
-    struct bpf_map_def SEC("maps") btf_map = {
-        .type = BPF_MAP_TYPE_ARRAY,
-        .key_size = sizeof(int),
-        .value_size = sizeof(struct ipv_counts),
-        .max_entries = 4,
-    };
-    BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+    struct {
+        __uint(type, BPF_MAP_TYPE_ARRAY);
+        __type(key, int);
+        __type(value, struct ipv_counts);
+        __uint(max_entries, 4);
+    } btf_map SEC(".maps");
  
-Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
-value types for the map. During ELF parsing, libbpf is able to extract
-key/value type_id's and assign them to BPF_MAP_CREATE attributes
-automatically.
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
  
  .. _BPF_Prog_Load:
  
@@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,::
             ___A b1:4;
             enum A b2:4;
        };
-      struct bpf_map_def SEC("maps") tmpmap = {
-           .type = BPF_MAP_TYPE_ARRAY,
-           .key_size = sizeof(__u32),
-           .value_size = sizeof(struct tmp_t),
-           .max_entries = 1,
-      };
-      BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+      struct {
+           __uint(type, BPF_MAP_TYPE_ARRAY);
+           __type(key, int);
+           __type(value, struct tmp_t);
+           __uint(max_entries, 1);
+      } tmpmap SEC(".maps");
  
  bpftool is able to pretty print like below:
  ::
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c

index 83c8908f0cc7d7c1fd8d175ce1a98fc3b8f4d5a8..309bd3e97ea00c7017a1f8fdf97592db17a86cbd 100644 (file)
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
                         bytes_compl += buf->skb->len;
                         pkts_compl++;
                         dev_kfree_skb_any(buf->skb);
-               } else if (buf->type == MVNETA_TYPE_XDP_TX ||
-                          buf->type == MVNETA_TYPE_XDP_NDO) {
+               } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+                           buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
                         if (napi && buf->type == MVNETA_TYPE_XDP_TX)
                                 xdp_return_frame_rx_napi(buf->xdpf);
                         else
@@ -2060,61 +2060,106 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
  
  static void
  mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
-                   struct xdp_buff *xdp, struct skb_shared_info *sinfo,
-                   int sync_len)
+                   struct xdp_buff *xdp, int sync_len)
  {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
         int i;
  
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
         for (i = 0; i < sinfo->nr_frags; i++)
                 page_pool_put_full_page(rxq->page_pool,
                                         skb_frag_page(&sinfo->frags[i]), true);
+
+out:
         page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
                            sync_len, true);
  }
  
  static int
  mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
-                       struct xdp_frame *xdpf, bool dma_map)
+                       struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
  {
-       struct mvneta_tx_desc *tx_desc;
-       struct mvneta_tx_buf *buf;
-       dma_addr_t dma_addr;
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+       struct device *dev = pp->dev->dev.parent;
+       struct mvneta_tx_desc *tx_desc = NULL;
+       int i, num_frames = 1;
+       struct page *page;
+
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               num_frames += sinfo->nr_frags;
  
-       if (txq->count >= txq->tx_stop_threshold)
+       if (txq->count + num_frames >= txq->size)
                 return MVNETA_XDP_DROPPED;
  
-       tx_desc = mvneta_txq_next_desc_get(txq);
+       for (i = 0; i < num_frames; i++) {
+               struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+               skb_frag_t *frag = NULL;
+               int len = xdpf->len;
+               dma_addr_t dma_addr;
  
-       buf = &txq->buf[txq->txq_put_index];
-       if (dma_map) {
-               /* ndo_xdp_xmit */
-               dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
-                                         xdpf->len, DMA_TO_DEVICE);
-               if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
-                       mvneta_txq_desc_put(txq);
-                       return MVNETA_XDP_DROPPED;
+               if (unlikely(i)) { /* paged area */
+                       frag = &sinfo->frags[i - 1];
+                       len = skb_frag_size(frag);
                 }
-               buf->type = MVNETA_TYPE_XDP_NDO;
-       } else {
-               struct page *page = virt_to_page(xdpf->data);
  
-               dma_addr = page_pool_get_dma_addr(page) +
-                          sizeof(*xdpf) + xdpf->headroom;
-               dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
-                                          xdpf->len, DMA_BIDIRECTIONAL);
-               buf->type = MVNETA_TYPE_XDP_TX;
+               tx_desc = mvneta_txq_next_desc_get(txq);
+               if (dma_map) {
+                       /* ndo_xdp_xmit */
+                       void *data;
+
+                       data = unlikely(frag) ? skb_frag_address(frag)
+                                             : xdpf->data;
+                       dma_addr = dma_map_single(dev, data, len,
+                                                 DMA_TO_DEVICE);
+                       if (dma_mapping_error(dev, dma_addr)) {
+                               mvneta_txq_desc_put(txq);
+                               goto unmap;
+                       }
+
+                       buf->type = MVNETA_TYPE_XDP_NDO;
+               } else {
+                       page = unlikely(frag) ? skb_frag_page(frag)
+                                             : virt_to_page(xdpf->data);
+                       dma_addr = page_pool_get_dma_addr(page);
+                       if (unlikely(frag))
+                               dma_addr += skb_frag_off(frag);
+                       else
+                               dma_addr += sizeof(*xdpf) + xdpf->headroom;
+                       dma_sync_single_for_device(dev, dma_addr, len,
+                                                  DMA_BIDIRECTIONAL);
+                       buf->type = MVNETA_TYPE_XDP_TX;
+               }
+               buf->xdpf = unlikely(i) ? NULL : xdpf;
+
+               tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+               tx_desc->buf_phys_addr = dma_addr;
+               tx_desc->data_size = len;
+               *nxmit_byte += len;
+
+               mvneta_txq_inc_put(txq);
         }
-       buf->xdpf = xdpf;
  
-       tx_desc->command = MVNETA_TXD_FLZ_DESC;
-       tx_desc->buf_phys_addr = dma_addr;
-       tx_desc->data_size = xdpf->len;
+       /*last descriptor */
+       if (likely(tx_desc))
+               tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
  
-       mvneta_txq_inc_put(txq);
-       txq->pending++;
-       txq->count++;
+       txq->pending += num_frames;
+       txq->count += num_frames;
  
         return MVNETA_XDP_TX;
+
+unmap:
+       for (i--; i >= 0; i--) {
+               mvneta_txq_desc_put(txq);
+               tx_desc = txq->descs + txq->next_desc_to_proc;
+               dma_unmap_single(dev, tx_desc->buf_phys_addr,
+                                tx_desc->data_size,
+                                DMA_TO_DEVICE);
+       }
+
+       return MVNETA_XDP_DROPPED;
  }
  
  static int
@@ -2123,8 +2168,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
         struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
         struct mvneta_tx_queue *txq;
         struct netdev_queue *nq;
+       int cpu, nxmit_byte = 0;
         struct xdp_frame *xdpf;
-       int cpu;
         u32 ret;
  
         xdpf = xdp_convert_buff_to_frame(xdp);
@@ -2136,10 +2181,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
         nq = netdev_get_tx_queue(pp->dev, txq->id);
  
         __netif_tx_lock(nq, cpu);
-       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
         if (ret == MVNETA_XDP_TX) {
                 u64_stats_update_begin(&stats->syncp);
-               stats->es.ps.tx_bytes += xdpf->len;
+               stats->es.ps.tx_bytes += nxmit_byte;
                 stats->es.ps.tx_packets++;
                 stats->es.ps.xdp_tx++;
                 u64_stats_update_end(&stats->syncp);
@@ -2178,11 +2223,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
  
         __netif_tx_lock(nq, cpu);
         for (i = 0; i < num_frame; i++) {
-               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+                                             true);
                 if (ret != MVNETA_XDP_TX)
                         break;
  
-               nxmit_byte += frames[i]->len;
                 nxmit++;
         }
  
@@ -2205,7 +2250,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                struct bpf_prog *prog, struct xdp_buff *xdp,
                u32 frame_sz, struct mvneta_stats *stats)
  {
-       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
         unsigned int len, data_len, sync;
         u32 ret, act;
  
@@ -2226,7 +2270,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
  
                 err = xdp_do_redirect(pp->dev, xdp, prog);
                 if (unlikely(err)) {
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                         ret = MVNETA_XDP_DROPPED;
                 } else {
                         ret = MVNETA_XDP_REDIR;
@@ -2237,7 +2281,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
         case XDP_TX:
                 ret = mvneta_xdp_xmit_back(pp, xdp);
                 if (ret != MVNETA_XDP_TX)
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                 break;
         default:
                 bpf_warn_invalid_xdp_action(pp->dev, prog, act);
@@ -2246,7 +2290,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                 trace_xdp_exception(pp->dev, prog, act);
                 fallthrough;
         case XDP_DROP:
-               mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+               mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                 ret = MVNETA_XDP_DROPPED;
                 stats->xdp_drop++;
                 break;
@@ -2269,7 +2313,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
         int data_len = -MVNETA_MH_SIZE, len;
         struct net_device *dev = pp->dev;
         enum dma_data_direction dma_dir;
-       struct skb_shared_info *sinfo;
  
         if (*size > MVNETA_MAX_RX_BUF_SIZE) {
                 len = MVNETA_MAX_RX_BUF_SIZE;
@@ -2289,11 +2332,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
  
         /* Prefetch header */
         prefetch(data);
+       xdp_buff_clear_frags_flag(xdp);
         xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
                          data_len, false);
-
-       sinfo = xdp_get_shared_info_from_buff(xdp);
-       sinfo->nr_frags = 0;
  }
  
  static void
@@ -2301,9 +2342,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                             struct mvneta_rx_desc *rx_desc,
                             struct mvneta_rx_queue *rxq,
                             struct xdp_buff *xdp, int *size,
-                           struct skb_shared_info *xdp_sinfo,
                             struct page *page)
  {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
         struct net_device *dev = pp->dev;
         enum dma_data_direction dma_dir;
         int data_len, len;
@@ -2321,25 +2362,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                                 len, dma_dir);
         rx_desc->buf_phys_addr = 0;
  
-       if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
-               skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
+       if (!xdp_buff_has_frags(xdp))
+               sinfo->nr_frags = 0;
+
+       if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+               skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
  
                 skb_frag_off_set(frag, pp->rx_offset_correction);
                 skb_frag_size_set(frag, data_len);
                 __skb_frag_set_page(frag, page);
+
+               if (!xdp_buff_has_frags(xdp)) {
+                       sinfo->xdp_frags_size = *size;
+                       xdp_buff_set_frags_flag(xdp);
+               }
+               if (page_is_pfmemalloc(page))
+                       xdp_buff_set_frag_pfmemalloc(xdp);
         } else {
                 page_pool_put_full_page(rxq->page_pool, page, true);
         }
-
-       /* last fragment */
-       if (len == *size) {
-               struct skb_shared_info *sinfo;
-
-               sinfo = xdp_get_shared_info_from_buff(xdp);
-               sinfo->nr_frags = xdp_sinfo->nr_frags;
-               memcpy(sinfo->frags, xdp_sinfo->frags,
-                      sinfo->nr_frags * sizeof(skb_frag_t));
-       }
         *size -= len;
  }
  
@@ -2348,8 +2389,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                       struct xdp_buff *xdp, u32 desc_status)
  {
         struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-       int i, num_frags = sinfo->nr_frags;
         struct sk_buff *skb;
+       u8 num_frags;
+
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               num_frags = sinfo->nr_frags;
  
         skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
         if (!skb)
@@ -2361,13 +2405,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
         skb_put(skb, xdp->data_end - xdp->data);
         skb->ip_summed = mvneta_rx_csum(pp, desc_status);
  
-       for (i = 0; i < num_frags; i++) {
-               skb_frag_t *frag = &sinfo->frags[i];
-
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                               skb_frag_page(frag), skb_frag_off(frag),
-                               skb_frag_size(frag), PAGE_SIZE);
-       }
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               xdp_update_skb_shared_info(skb, num_frags,
+                                          sinfo->xdp_frags_size,
+                                          num_frags * xdp->frame_sz,
+                                          xdp_buff_is_frag_pfmemalloc(xdp));
  
         return skb;
  }
@@ -2379,7 +2421,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
  {
         int rx_proc = 0, rx_todo, refill, size = 0;
         struct net_device *dev = pp->dev;
-       struct skb_shared_info sinfo;
         struct mvneta_stats ps = {};
         struct bpf_prog *xdp_prog;
         u32 desc_status, frame_sz;
@@ -2388,8 +2429,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
         xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
         xdp_buf.data_hard_start = NULL;
  
-       sinfo.nr_frags = 0;
-
         /* Get number of received packets */
         rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
  
@@ -2431,7 +2470,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                         }
  
                         mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
-                                                   &size, &sinfo, page);
+                                                   &size, page);
                 } /* Middle or Last descriptor */
  
                 if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2439,7 +2478,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                         continue;
  
                 if (size) {
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
                         goto next;
                 }
  
@@ -2451,7 +2490,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                 if (IS_ERR(skb)) {
                         struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
  
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
  
                         u64_stats_update_begin(&stats->syncp);
                         stats->es.skb_alloc_error++;
@@ -2468,11 +2507,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                 napi_gro_receive(napi, skb);
  next:
                 xdp_buf.data_hard_start = NULL;
-               sinfo.nr_frags = 0;
         }
  
         if (xdp_buf.data_hard_start)
-               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
  
         if (ps.xdp_redirect)
                 xdp_do_flush_map();
@@ -3260,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
                 return err;
         }
  
-       err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
+       err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+                                PAGE_SIZE);
         if (err < 0)
                 goto err_free_pp;
  
@@ -3740,6 +3779,7 @@ static void mvneta_percpu_disable(void *arg)
  static int mvneta_change_mtu(struct net_device *dev, int mtu)
  {
         struct mvneta_port *pp = netdev_priv(dev);
+       struct bpf_prog *prog = pp->xdp_prog;
         int ret;
  
         if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
@@ -3748,8 +3788,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
                 mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
         }
  
-       if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+       if (prog && !prog->aux->xdp_has_frags &&
+           mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+                           mtu);
+
                 return -EINVAL;
         }
  
@@ -4490,8 +4533,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
         struct mvneta_port *pp = netdev_priv(dev);
         struct bpf_prog *old_prog;
  
-       if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+       if (prog && !prog->aux->xdp_has_frags &&
+           dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
                 return -EOPNOTSUPP;
         }
  
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index fa517ae604ad2f2d3603b4103d69ace80f3ac584..8c92c974bd12f8f53657e4bccc3cbed11cb41b22 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,6 +194,17 @@ struct bpf_map {
         struct work_struct work;
         struct mutex freeze_mutex;
         atomic64_t writecnt;
+       /* 'Ownership' of program-containing map is claimed by the first program
+        * that is going to use this map or by the first program which FD is
+        * stored in the map to make sure that all callers and callees have the
+        * same prog type, JITed flag and xdp_has_frags flag.
+        */
+       struct {
+               spinlock_t lock;
+               enum bpf_prog_type type;
+               bool jited;
+               bool xdp_has_frags;
+       } owner;
  };
  
  static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -578,7 +589,6 @@ struct bpf_verifier_ops {
                                  const struct btf_type *t, int off, int size,
                                  enum bpf_access_type atype,
                                  u32 *next_btf_id);
-       bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
  };
  
  struct bpf_prog_offload_ops {
@@ -939,6 +949,7 @@ struct bpf_prog_aux {
         bool func_proto_unreliable;
         bool sleepable;
         bool tail_call_reachable;
+       bool xdp_has_frags;
         struct hlist_node tramp_hlist;
         /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
         const struct btf_type *attach_func_proto;
@@ -999,16 +1010,6 @@ struct bpf_prog_aux {
  };
  
  struct bpf_array_aux {
-       /* 'Ownership' of prog array is claimed by the first program that
-        * is going to use this map or by the first program which FD is
-        * stored in the map to make sure that all callers and callees have
-        * the same prog type and JITed flag.
-        */
-       struct {
-               spinlock_t lock;
-               enum bpf_prog_type type;
-               bool jited;
-       } owner;
         /* Programs with direct jumps into programs part of this array. */
         struct list_head poke_progs;
         struct bpf_map *map;
@@ -1183,7 +1184,14 @@ struct bpf_event_entry {
         struct rcu_head rcu;
  };
  
-bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+static inline bool map_type_contains_progs(struct bpf_map *map)
+{
+       return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+              map->map_type == BPF_MAP_TYPE_DEVMAP ||
+              map->map_type == BPF_MAP_TYPE_CPUMAP;
+}
+
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
  int bpf_prog_calc_tag(struct bpf_prog *fp);
  
  const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -1251,6 +1259,7 @@ struct bpf_run_ctx {};
  struct bpf_cg_run_ctx {
         struct bpf_run_ctx run_ctx;
         const struct bpf_prog_array_item *prog_item;
+       int retval;
  };
  
  struct bpf_trace_run_ctx {
@@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
  
  typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
  
-static __always_inline u32
+static __always_inline int
  BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
                             const void *ctx, bpf_prog_run_fn run_prog,
-                           u32 *ret_flags)
+                           int retval, u32 *ret_flags)
  {
         const struct bpf_prog_array_item *item;
         const struct bpf_prog *prog;
         const struct bpf_prog_array *array;
         struct bpf_run_ctx *old_run_ctx;
         struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
         u32 func_ret;
  
+       run_ctx.retval = retval;
         migrate_disable();
         rcu_read_lock();
         array = rcu_dereference(array_rcu);
@@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
         while ((prog = READ_ONCE(item->prog))) {
                 run_ctx.prog_item = item;
                 func_ret = run_prog(prog, ctx);
-               ret &= (func_ret & 1);
+               if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                 *(ret_flags) |= (func_ret >> 1);
                 item++;
         }
         bpf_reset_run_ctx(old_run_ctx);
         rcu_read_unlock();
         migrate_enable();
-       return ret;
+       return run_ctx.retval;
  }
  
-static __always_inline u32
+static __always_inline int
  BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-                     const void *ctx, bpf_prog_run_fn run_prog)
+                     const void *ctx, bpf_prog_run_fn run_prog,
+                     int retval)
  {
         const struct bpf_prog_array_item *item;
         const struct bpf_prog *prog;
         const struct bpf_prog_array *array;
         struct bpf_run_ctx *old_run_ctx;
         struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
  
+       run_ctx.retval = retval;
         migrate_disable();
         rcu_read_lock();
         array = rcu_dereference(array_rcu);
@@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
         old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
         while ((prog = READ_ONCE(item->prog))) {
                 run_ctx.prog_item = item;
-               ret &= run_prog(prog, ctx);
+               if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                 item++;
         }
         bpf_reset_run_ctx(old_run_ctx);
         rcu_read_unlock();
         migrate_enable();
-       return ret;
+       return run_ctx.retval;
  }
  
  static __always_inline u32
@@ -1391,19 +1403,21 @@ out:
   *   0: NET_XMIT_SUCCESS  skb should be transmitted
   *   1: NET_XMIT_DROP     skb should be dropped and cn
   *   2: NET_XMIT_CN       skb should be transmitted and cn
- *   3: -EPERM            skb should be dropped
+ *   3: -err              skb should be dropped
   */
  #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
         ({                                              \
                 u32 _flags = 0;                         \
                 bool _cn;                               \
                 u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
                 _cn = _flags & BPF_RET_SET_CN;          \
-               if (_ret)                               \
+               if (_ret && !IS_ERR_VALUE((long)_ret))  \
+                       _ret = -EFAULT;                 \
+               if (!_ret)                              \
                         _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
                 else                                    \
-                       _ret = (_cn ? NET_XMIT_DROP : -EPERM);          \
+                       _ret = (_cn ? NET_XMIT_DROP : _ret);            \
                 _ret;                                   \
         })
  
@@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
  int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
                                 const union bpf_attr *kattr,
                                 union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                     const struct bpf_prog *prog,
                     struct bpf_insn_access_aux *info);
@@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
         return -ENOTSUPP;
  }
  
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
-                                                 struct module *owner)
-{
-       return false;
-}
-
  static inline void bpf_map_put(struct bpf_map *map)
  {
  }
@@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr);
+
  void sock_map_unhash(struct sock *sk);
  void sock_map_close(struct sock *sk, long timeout);
  #else
@@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
  {
         return -EOPNOTSUPP;
  }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                                         union bpf_attr __user *uattr)
+{
+       return -EINVAL;
+}
  #endif /* CONFIG_BPF_SYSCALL */
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
  
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index e9993172f892e8297487bafc8f54c3002fde4e87..7a7be8c057f2c2c2e971d02e248d9bfcfcb1f18f 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
  
  int check_ptr_off_reg(struct bpf_verifier_env *env,
                       const struct bpf_reg_state *reg, int regno);
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno);
  int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                    u32 regno, u32 mem_size);
  
@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
         return type & ~BPF_BASE_TYPE_MASK;
  }
  
+static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
  #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h

index 0c74348cbc9dc23e04e5ca36e90f834b2fb2373e..b12cfe3b12bbe593e60e8ca95192fbe3e0599ea6 100644 (file)
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -12,11 +12,33 @@
  #define BTF_TYPE_EMIT(type) ((void)(type *)0)
  #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
  
+enum btf_kfunc_type {
+       BTF_KFUNC_TYPE_CHECK,
+       BTF_KFUNC_TYPE_ACQUIRE,
+       BTF_KFUNC_TYPE_RELEASE,
+       BTF_KFUNC_TYPE_RET_NULL,
+       BTF_KFUNC_TYPE_MAX,
+};
+
  struct btf;
  struct btf_member;
  struct btf_type;
  union bpf_attr;
  struct btf_show;
+struct btf_id_set;
+
+struct btf_kfunc_id_set {
+       struct module *owner;
+       union {
+               struct {
+                       struct btf_id_set *check_set;
+                       struct btf_id_set *acquire_set;
+                       struct btf_id_set *release_set;
+                       struct btf_id_set *ret_null_set;
+               };
+               struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
+       };
+};
  
  extern const struct file_operations btf_fops;
  
@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
  const char *btf_name_by_offset(const struct btf *btf, u32 offset);
  struct btf *btf_parse_vmlinux(void);
  struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id);
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *s);
  #else
  static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
                                                     u32 type_id)
@@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
  {
         return NULL;
  }
-#endif
-
-struct kfunc_btf_id_set {
-       struct list_head list;
-       struct btf_id_set *set;
-       struct module *owner;
-};
-
-struct kfunc_btf_id_list {
-       struct list_head list;
-       struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                            struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                              struct kfunc_btf_id_set *s)
+static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
+                                            enum bpf_prog_type prog_type,
+                                            enum btf_kfunc_type type,
+                                            u32 kfunc_btf_id)
  {
+       return false;
  }
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
-                                           u32 kfunc_id, struct module *owner)
+static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                                           const struct btf_kfunc_id_set *s)
  {
-       return false;
+       return 0;
  }
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
  #endif
  
-#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
-       struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
-                                        THIS_MODULE }
-
  #endif
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h

index 919c0fde1c515f1699aef52eeb5b2e170e965cf7..bc5d9cc34e4cb3799017a979b946a8a05a1b2668 100644 (file)
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -11,6 +11,7 @@ struct btf_id_set {
  #ifdef CONFIG_DEBUG_INFO_BTF
  
  #include <linux/compiler.h> /* for __PASTE */
+#include <linux/compiler_attributes.h> /* for __maybe_unused */
  
  /*
   * Following macros help to define lists of BTF IDs placed
@@ -146,14 +147,14 @@ extern struct btf_id_set name;
  
  #else
  
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
  #define BTF_ID(prefix, name)
  #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
  #define BTF_SET_END(name)
  
  #endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 71fa57b88bfc0d42db2d812858e599c93185a785..d23e999dc0324b72dae804049fde2092b250a5d0 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
         s32             level;
         s32             optname;
         s32             optlen;
-       s32             retval;
+       /* for retval in struct bpf_cg_run_ctx */
+       struct task_struct *current_task;
+       /* Temporary "register" for indirect stores to ppos. */
+       u64             tmp_reg;
  };
  
  int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index bf11e1fbd69b65660ca217c5d22623a9b7046e9c..8131d0de7559c0c092c3623c2786ac33fbb924c2 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -557,6 +557,7 @@ struct skb_shared_info {
          * Warning : all fields before dataref are cleared in __alloc_skb()
          */
         atomic_t        dataref;
+       unsigned int    xdp_frags_size;
  
         /* Intermediate layers must ensure that destructor_arg
          * remains valid until skb destructor */
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h

new file mode 100644 (file)

index 0000000..a473b56
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_bpf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/btf.h>
+#include <linux/kconfig.h>
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+       return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
diff --git a/include/net/xdp.h b/include/net/xdp.h

index 8f0812e4996da64743a2e7f50ca99f16cdf453ba..b7721c3e4d1fac5ed97a3503c7ce3939033c1780 100644 (file)
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -60,12 +60,20 @@ struct xdp_rxq_info {
         u32 reg_state;
         struct xdp_mem_info mem;
         unsigned int napi_id;
+       u32 frag_size;
  } ____cacheline_aligned; /* perf critical, avoid false-sharing */
  
  struct xdp_txq_info {
         struct net_device *dev;
  };
  
+enum xdp_buff_flags {
+       XDP_FLAGS_HAS_FRAGS             = BIT(0), /* non-linear xdp buff */
+       XDP_FLAGS_FRAGS_PF_MEMALLOC     = BIT(1), /* xdp paged memory is under
+                                                  * pressure
+                                                  */
+};
+
  struct xdp_buff {
         void *data;
         void *data_end;
@@ -74,13 +82,40 @@ struct xdp_buff {
         struct xdp_rxq_info *rxq;
         struct xdp_txq_info *txq;
         u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+       u32 flags; /* supported values defined in xdp_buff_flags */
  };
  
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
  static __always_inline void
  xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
  {
         xdp->frame_sz = frame_sz;
         xdp->rxq = rxq;
+       xdp->flags = 0;
  }
  
  static __always_inline void
@@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
         return (struct skb_shared_info *)xdp_data_hard_end(xdp);
  }
  
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+       unsigned int len = xdp->data_end - xdp->data;
+       struct skb_shared_info *sinfo;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       len += sinfo->xdp_frags_size;
+out:
+       return len;
+}
+
  struct xdp_frame {
         void *data;
         u16 len;
@@ -122,8 +171,19 @@ struct xdp_frame {
          */
         struct xdp_mem_info mem;
         struct net_device *dev_rx; /* used by cpumap */
+       u32 flags; /* supported values defined in xdp_buff_flags */
  };
  
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
  #define XDP_BULK_QUEUE_SIZE    16
  struct xdp_frame_bulk {
         int count;
@@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
         frame->dev_rx = NULL;
  }
  
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+                          unsigned int size, unsigned int truesize,
+                          bool pfmemalloc)
+{
+       skb_shinfo(skb)->nr_frags = nr_frags;
+
+       skb->len += size;
+       skb->data_len += size;
+       skb->truesize += truesize;
+       skb->pfmemalloc |= pfmemalloc;
+}
+
  /* Avoids inlining WARN macro in fast-path */
  void xdp_warn(const char *msg, const char *func, const int line);
  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
@@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
         xdp->data_end = frame->data + frame->len;
         xdp->data_meta = frame->data - frame->metasize;
         xdp->frame_sz = frame->frame_sz;
+       xdp->flags = frame->flags;
  }
  
  static inline
@@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp,
         xdp_frame->headroom = headroom - sizeof(*xdp_frame);
         xdp_frame->metasize = metasize;
         xdp_frame->frame_sz = xdp->frame_sz;
+       xdp_frame->flags = xdp->flags;
  
         return 0;
  }
@@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
         return xdp_frame;
  }
  
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp);
  void xdp_return_frame(struct xdp_frame *xdpf);
  void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
  void xdp_return_buff(struct xdp_buff *xdp);
@@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
  static inline void xdp_release_frame(struct xdp_frame *xdpf)
  {
         struct xdp_mem_info *mem = &xdpf->mem;
+       struct skb_shared_info *sinfo;
+       int i;
  
         /* Curr only page_pool needs this */
-       if (mem->type == MEM_TYPE_PAGE_POOL)
-               __xdp_release_frame(xdpf->data, mem);
+       if (mem->type != MEM_TYPE_PAGE_POOL)
+               return;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_release_frame(page_address(page), mem);
+       }
+out:
+       __xdp_release_frame(xdpf->data, mem);
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                struct net_device *dev, u32 queue_index,
+                unsigned int napi_id)
+{
+       return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
  }
  
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id);
  void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
  void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
  bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index b0383d371b9af53ea816766fb9fc95cbec277ebe..16a7574292a53a408f7cdaa0c9afb3e7d8710320 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
   *                     *ctx_out*, *data_in* and *data_out* must be NULL.
   *                     *repeat* must be zero.
   *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
   *     Return
   *             Returns zero on success. On error, -1 is returned and *errno*
   *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
   */
  #define BPF_F_SLEEPABLE                (1U << 4)
  
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * the following extensions:
   *
@@ -1775,6 +1782,8 @@ union bpf_attr {
   *             0 on success, or a negative error in case of failure.
   *
   * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
   *     Return
   *             A 64-bit integer containing the current tgid and pid, and
   *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
   *             *current_task*\ **->pid**.
   *
   * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
   *     Return
   *             A 64-bit integer containing the current GID and UID, and
   *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
   *             The 32-bit hash.
   *
   * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
   *     Return
   *             A pointer to the current task struct.
   *
@@ -2369,6 +2382,8 @@ union bpf_attr {
   *             indicate that the hash is outdated and to trigger a
   *             recalculation the next time the kernel tries to access this
   *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
   *
   * long bpf_get_numa_node_id(void)
   *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
   *             A 8-byte long unique number or 0 if *sk* is NULL.
   *
   * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
   *     Return
   *             The owner UID of the socket associated to *skb*. If the socket
   *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
   *             The id is returned or 0 in case the id could not be retrieved.
   *
   * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
   *     Return
   *             A 64-bit integer containing the current cgroup id based
   *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
   *
   *     Return
   *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
         FN(get_func_arg),               \
         FN(get_func_ret),               \
         FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
         /* */
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c

index c7a5be3bf8bea34b75495ebdaf66e0bb4df77908..7f145aefbff87f6b984f894a747391d1a10c7d30 100644 (file)
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
  static void *prog_fd_array_get_ptr(struct bpf_map *map,
                                    struct file *map_file, int fd)
  {
-       struct bpf_array *array = container_of(map, struct bpf_array, map);
         struct bpf_prog *prog = bpf_prog_get(fd);
  
         if (IS_ERR(prog))
                 return prog;
  
-       if (!bpf_prog_array_compatible(array, prog)) {
+       if (!bpf_prog_map_compatible(map, prog)) {
                 bpf_prog_put(prog);
                 return ERR_PTR(-EINVAL);
         }
@@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
         INIT_WORK(&aux->work, prog_array_map_clear_deferred);
         INIT_LIST_HEAD(&aux->poke_progs);
         mutex_init(&aux->poke_mutex);
-       spin_lock_init(&aux->owner.lock);
  
         map = array_map_alloc(attr);
         if (IS_ERR(map)) {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c

index e16dafeb24504a307c40d1e37c9da6e6ede1a076..a1c44c17ea9c0113a5567878707fd0e87f726a96 100644 (file)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -198,6 +198,21 @@
  DEFINE_IDR(btf_idr);
  DEFINE_SPINLOCK(btf_idr_lock);
  
+enum btf_kfunc_hook {
+       BTF_KFUNC_HOOK_XDP,
+       BTF_KFUNC_HOOK_TC,
+       BTF_KFUNC_HOOK_STRUCT_OPS,
+       BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+       BTF_KFUNC_SET_MAX_CNT = 32,
+};
+
+struct btf_kfunc_set_tab {
+       struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
  struct btf {
         void *data;
         struct btf_type **types;
@@ -212,6 +227,7 @@ struct btf {
         refcount_t refcnt;
         u32 id;
         struct rcu_head rcu;
+       struct btf_kfunc_set_tab *kfunc_set_tab;
  
         /* split BTF support */
         struct btf *base_btf;
@@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf)
         spin_unlock_irqrestore(&btf_idr_lock, flags);
  }
  
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+       struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+       int hook, type;
+
+       if (!tab)
+               return;
+       /* For module BTF, we directly assign the sets being registered, so
+        * there is nothing to free except kfunc_set_tab.
+        */
+       if (btf_is_module(btf))
+               goto free_tab;
+       for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+               for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+                       kfree(tab->sets[hook][type]);
+       }
+free_tab:
+       kfree(tab);
+       btf->kfunc_set_tab = NULL;
+}
+
  static void btf_free(struct btf *btf)
  {
+       btf_free_kfunc_set_tab(btf);
         kvfree(btf->types);
         kvfree(btf->resolved_sizes);
         kvfree(btf->resolved_ids);
@@ -5616,17 +5654,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
         return true;
  }
  
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+                                 const struct btf_param *arg,
+                                 const struct bpf_reg_state *reg)
+{
+       int len, sfx_len = sizeof("__sz") - 1;
+       const struct btf_type *t;
+       const char *param_name;
+
+       t = btf_type_skip_modifiers(btf, arg->type, NULL);
+       if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+               return false;
+
+       /* In the future, this can be ported to use BTF tagging */
+       param_name = btf_name_by_offset(btf, arg->name_off);
+       if (str_is_empty(param_name))
+               return false;
+       len = strlen(param_name);
+       if (len < sfx_len)
+               return false;
+       param_name += len - sfx_len;
+       if (strncmp(param_name, "__sz", sfx_len))
+               return false;
+
+       return true;
+}
+
  static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                                     const struct btf *btf, u32 func_id,
                                     struct bpf_reg_state *regs,
                                     bool ptr_to_mem_ok)
  {
         struct bpf_verifier_log *log = &env->log;
+       u32 i, nargs, ref_id, ref_obj_id = 0;
         bool is_kfunc = btf_is_kernel(btf);
         const char *func_name, *ref_tname;
         const struct btf_type *t, *ref_t;
         const struct btf_param *args;
-       u32 i, nargs, ref_id;
+       int ref_regno = 0;
+       bool rel = false;
  
         t = btf_type_by_id(btf, func_id);
         if (!t || !btf_type_is_func(t)) {
@@ -5704,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                         if (reg->type == PTR_TO_BTF_ID) {
                                 reg_btf = reg->btf;
                                 reg_ref_id = reg->btf_id;
+                               /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+                               if (reg->ref_obj_id) {
+                                       if (ref_obj_id) {
+                                               bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+                                                       regno, reg->ref_obj_id, ref_obj_id);
+                                               return -EFAULT;
+                                       }
+                                       ref_regno = regno;
+                                       ref_obj_id = reg->ref_obj_id;
+                               }
                         } else {
                                 reg_btf = btf_vmlinux;
                                 reg_ref_id = *reg2btf_ids[reg->type];
@@ -5727,17 +5803,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                         u32 type_size;
  
                         if (is_kfunc) {
+                               bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+
                                 /* Permit pointer to mem, but only when argument
                                  * type is pointer to scalar, or struct composed
                                  * (recursively) of scalars.
+                                * When arg_mem_size is true, the pointer can be
+                                * void *.
                                  */
                                 if (!btf_type_is_scalar(ref_t) &&
-                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+                                   (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
                                         bpf_log(log,
-                                               "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
-                                               i, btf_type_str(ref_t), ref_tname);
+                                               "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+                                               i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
                                         return -EINVAL;
                                 }
+
+                               /* Check for mem, len pair */
+                               if (arg_mem_size) {
+                                       if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+                                               bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+                                                       i, i + 1);
+                                               return -EINVAL;
+                                       }
+                                       i++;
+                                       continue;
+                               }
                         }
  
                         resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
@@ -5758,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                 }
         }
  
-       return 0;
+       /* Either both are set, or neither */
+       WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+       if (is_kfunc) {
+               rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+                                               BTF_KFUNC_TYPE_RELEASE, func_id);
+               /* We already made sure ref_obj_id is set only for one argument */
+               if (rel && !ref_obj_id) {
+                       bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+                               func_name);
+                       return -EINVAL;
+               }
+               /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
+                * other kfuncs works
+                */
+       }
+       /* returns argument register number > 0 in case of reference release kfunc */
+       return rel ? ref_regno : 0;
  }
  
  /* Compare BTF of a function with given bpf_reg_state.
@@ -6200,12 +6308,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
         return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
  }
  
+enum {
+       BTF_MODULE_F_LIVE = (1 << 0),
+};
+
  #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
  struct btf_module {
         struct list_head list;
         struct module *module;
         struct btf *btf;
         struct bin_attribute *sysfs_attr;
+       int flags;
  };
  
  static LIST_HEAD(btf_modules);
@@ -6233,7 +6346,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
         int err = 0;
  
         if (mod->btf_data_size == 0 ||
-           (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+           (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+            op != MODULE_STATE_GOING))
                 goto out;
  
         switch (op) {
@@ -6291,6 +6405,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
                         btf_mod->sysfs_attr = attr;
                 }
  
+               break;
+       case MODULE_STATE_LIVE:
+               mutex_lock(&btf_module_mutex);
+               list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+                       if (btf_mod->module != module)
+                               continue;
+
+                       btf_mod->flags |= BTF_MODULE_F_LIVE;
+                       break;
+               }
+               mutex_unlock(&btf_module_mutex);
                 break;
         case MODULE_STATE_GOING:
                 mutex_lock(&btf_module_mutex);
@@ -6338,7 +6463,12 @@ struct module *btf_try_get_module(const struct btf *btf)
                 if (btf_mod->btf != btf)
                         continue;
  
-               if (try_module_get(btf_mod->module))
+               /* We must only consider module whose __init routine has
+                * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+                * which is set from the notifier callback for
+                * MODULE_STATE_LIVE.
+                */
+               if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
                         res = btf_mod->module;
  
                 break;
@@ -6349,6 +6479,36 @@ struct module *btf_try_get_module(const struct btf *btf)
         return res;
  }
  
+/* Returns struct btf corresponding to the struct module
+ *
+ * This function can return NULL or ERR_PTR. Note that caller must
+ * release reference for struct btf iff btf_is_module is true.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+       struct btf *btf = NULL;
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       struct btf_module *btf_mod, *tmp;
+#endif
+
+       if (!module)
+               return bpf_get_btf_vmlinux();
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       mutex_lock(&btf_module_mutex);
+       list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+               if (btf_mod->module != module)
+                       continue;
+
+               btf_get(btf_mod->btf);
+               btf = btf_mod->btf;
+               break;
+       }
+       mutex_unlock(&btf_module_mutex);
+#endif
+
+       return btf;
+}
+
  BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
  {
         struct btf *btf;
@@ -6416,53 +6576,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
  BTF_TRACING_TYPE_xxx
  #undef BTF_TRACING_TYPE
  
-/* BTF ID set registration API for modules */
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+/* Kernel Function (kfunc) BTF ID set registration API */
  
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s)
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                   enum btf_kfunc_type type,
+                                   struct btf_id_set *add_set, bool vmlinux_set)
  {
-       mutex_lock(&l->mutex);
-       list_add(&s->list, &l->list);
-       mutex_unlock(&l->mutex);
+       struct btf_kfunc_set_tab *tab;
+       struct btf_id_set *set;
+       u32 set_cnt;
+       int ret;
+
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       if (!add_set->cnt)
+               return 0;
+
+       tab = btf->kfunc_set_tab;
+       if (!tab) {
+               tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+               if (!tab)
+                       return -ENOMEM;
+               btf->kfunc_set_tab = tab;
+       }
+
+       set = tab->sets[hook][type];
+       /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+        * for module sets.
+        */
+       if (WARN_ON_ONCE(set && !vmlinux_set)) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       /* We don't need to allocate, concatenate, and sort module sets, because
+        * only one is allowed per hook. Hence, we can directly assign the
+        * pointer and return.
+        */
+       if (!vmlinux_set) {
+               tab->sets[hook][type] = add_set;
+               return 0;
+       }
+
+       /* In case of vmlinux sets, there may be more than one set being
+        * registered per hook. To create a unified set, we allocate a new set
+        * and concatenate all individual sets being registered. While each set
+        * is individually sorted, they may become unsorted when concatenated,
+        * hence re-sorting the final set again is required to make binary
+        * searching the set using btf_id_set_contains function work.
+        */
+       set_cnt = set ? set->cnt : 0;
+
+       if (set_cnt > U32_MAX - add_set->cnt) {
+               ret = -EOVERFLOW;
+               goto end;
+       }
+
+       if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+               ret = -E2BIG;
+               goto end;
+       }
+
+       /* Grow set */
+       set = krealloc(tab->sets[hook][type],
+                      offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+                      GFP_KERNEL | __GFP_NOWARN);
+       if (!set) {
+               ret = -ENOMEM;
+               goto end;
+       }
+
+       /* For newly allocated set, initialize set->cnt to 0 */
+       if (!tab->sets[hook][type])
+               set->cnt = 0;
+       tab->sets[hook][type] = set;
+
+       /* Concatenate the two sets */
+       memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+       set->cnt += add_set->cnt;
+
+       sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+       return 0;
+end:
+       btf_free_kfunc_set_tab(btf);
+       return ret;
  }
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
  
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                 const struct btf_kfunc_id_set *kset)
  {
-       mutex_lock(&l->mutex);
-       list_del_init(&s->list);
-       mutex_unlock(&l->mutex);
+       bool vmlinux_set = !btf_is_module(btf);
+       int type, ret;
+
+       for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+               if (!kset->sets[type])
+                       continue;
+
+               ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+               if (ret)
+                       break;
+       }
+       return ret;
  }
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
  
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner)
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+                                       enum btf_kfunc_hook hook,
+                                       enum btf_kfunc_type type,
+                                       u32 kfunc_btf_id)
  {
-       struct kfunc_btf_id_set *s;
+       struct btf_id_set *set;
  
-       mutex_lock(&klist->mutex);
-       list_for_each_entry(s, &klist->list, list) {
-               if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
-                       mutex_unlock(&klist->mutex);
-                       return true;
-               }
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+               return false;
+       if (!btf->kfunc_set_tab)
+               return false;
+       set = btf->kfunc_set_tab->sets[hook][type];
+       if (!set)
+               return false;
+       return btf_id_set_contains(set, kfunc_btf_id);
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+       switch (prog_type) {
+       case BPF_PROG_TYPE_XDP:
+               return BTF_KFUNC_HOOK_XDP;
+       case BPF_PROG_TYPE_SCHED_CLS:
+               return BTF_KFUNC_HOOK_TC;
+       case BPF_PROG_TYPE_STRUCT_OPS:
+               return BTF_KFUNC_HOOK_STRUCT_OPS;
+       default:
+               return BTF_KFUNC_HOOK_MAX;
         }
-       mutex_unlock(&klist->mutex);
-       return false;
  }
  
-#define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
-       struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
-                                         __MUTEX_INITIALIZER(name.mutex) };   \
-       EXPORT_SYMBOL_GPL(name)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id)
+{
+       enum btf_kfunc_hook hook;
  
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+}
  
-#endif
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *kset)
+{
+       enum btf_kfunc_hook hook;
+       struct btf *btf;
+       int ret;
+
+       btf = btf_get_module_btf(kset->owner);
+       if (IS_ERR_OR_NULL(btf))
+               return btf ? PTR_ERR(btf) : -ENOENT;
+
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       ret = btf_populate_kfunc_set(btf, hook, kset);
+       /* reference is only taken for module BTF */
+       if (btf_is_module(btf))
+               btf_put(btf);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
  
  int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                               const struct btf *targ_btf, __u32 targ_id)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index 514b4681a90ac0dff8ec0aa66594450fe25e1b2d..279ebbed75a56b0536e1be04c7420bb0b8614061 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
   *   NET_XMIT_DROP       (1)   - drop packet and notify TCP to call cwr
   *   NET_XMIT_CN         (2)   - continue with packet output and notify TCP
   *                               to call cwr
- *   -EPERM                    - drop packet
+ *   -err                      - drop packet
   *
   * For ingress packets, this function will return -EPERM if any
   * attached program was found and if it returned != 1 during execution.
@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
                         cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
         } else {
                 ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-                                           __bpf_prog_run_save_cb);
-               ret = (ret == 1 ? 0 : -EPERM);
+                                           __bpf_prog_run_save_cb, 0);
+               if (ret && !IS_ERR_VALUE((long)ret))
+                       ret = -EFAULT;
         }
         bpf_restore_data_end(skb, saved_data_end);
         __skb_pull(skb, offset);
@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
                                enum cgroup_bpf_attach_type atype)
  {
         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
  
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
+                                    bpf_prog_run, 0);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  
@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
         };
         struct sockaddr_storage unspec;
         struct cgroup *cgrp;
-       int ret;
  
         /* Check socket family since not all sockets represent network
          * endpoint (e.g. AF_UNIX).
@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
         }
  
         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-                                         bpf_prog_run, flags);
-
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+                                          bpf_prog_run, 0, flags);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
  
@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                      enum cgroup_bpf_attach_type atype)
  {
         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
  
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-                                   bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+                                    bpf_prog_run, 0);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
  
@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
                 .major = major,
                 .minor = minor,
         };
-       int allow;
+       int ret;
  
         rcu_read_lock();
         cgrp = task_dfl_cgroup(current);
-       allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                     bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
         rcu_read_unlock();
  
-       return !allow;
+       return ret;
  }
  
+BPF_CALL_0(bpf_get_retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+       .func           = bpf_get_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       ctx->retval = retval;
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_set_retval_proto = {
+       .func           = bpf_set_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
+
  static const struct bpf_func_proto *
  cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  {
@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_get_current_cgroup_id_proto;
         case BPF_FUNC_perf_event_output:
                 return &bpf_event_output_data_proto;
+       case BPF_FUNC_get_retval:
+               return &bpf_get_retval_proto;
+       case BPF_FUNC_set_retval:
+               return &bpf_set_retval_proto;
         default:
                 return bpf_base_func_proto(func_id);
         }
@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
  
         rcu_read_lock();
         cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
         rcu_read_unlock();
  
         kfree(ctx.cur_val);
@@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
                 kfree(ctx.new_val);
         }
  
-       return ret == 1 ? 0 : -EPERM;
+       return ret;
  }
  
  #ifdef CONFIG_NET
@@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
  
         lock_sock(sk);
         ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, 0);
         release_sock(sk);
  
-       if (!ret) {
-               ret = -EPERM;
+       if (ret)
                 goto out;
-       }
  
         if (ctx.optlen == -1) {
                 /* optlen set to -1, bypass kernel */
@@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                 .sk = sk,
                 .level = level,
                 .optname = optname,
-               .retval = retval,
+               .current_task = current,
         };
         int ret;
  
@@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
  
         lock_sock(sk);
         ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, retval);
         release_sock(sk);
  
-       if (!ret) {
-               ret = -EPERM;
+       if (ret < 0)
                 goto out;
-       }
  
         if (ctx.optlen > max_optlen || ctx.optlen < 0) {
                 ret = -EFAULT;
                 goto out;
         }
  
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval) {
-               ret = -EFAULT;
-               goto out;
-       }
-
         if (ctx.optlen != 0) {
                 if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
                     put_user(ctx.optlen, optlen)) {
@@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                 }
         }
  
-       ret = ctx.retval;
-
  out:
         sockopt_free_buf(&ctx, &buf);
         return ret;
@@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
                 .sk = sk,
                 .level = level,
                 .optname = optname,
-               .retval = retval,
                 .optlen = *optlen,
                 .optval = optval,
                 .optval_end = optval + *optlen,
+               .current_task = current,
         };
         int ret;
  
@@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
          */
  
         ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
-       if (!ret)
-               return -EPERM;
+                                   &ctx, bpf_prog_run, retval);
+       if (ret < 0)
+               return ret;
  
         if (ctx.optlen > *optlen)
                 return -EFAULT;
  
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval)
-               return -EFAULT;
-
         /* BPF programs can shrink the buffer, export the modifications.
          */
         if (ctx.optlen != 0)
                 *optlen = ctx.optlen;
  
-       return ctx.retval;
+       return ret;
  }
  #endif
  
@@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
                         *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
                 break;
         case offsetof(struct bpf_sockopt, retval):
-               if (type == BPF_WRITE)
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
-               else
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+               BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+               if (type == BPF_WRITE) {
+                       int treg = BPF_REG_9;
+
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             treg, treg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             treg, si->src_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+                       *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+               } else {
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             si->dst_reg, si->src_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+               }
                 break;
         case offsetof(struct bpf_sockopt, optval):
                 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index de3e5bc6781fec61848fd623d37497fca3d8e4d2..0a1cfd8544b9cdd0cc1c8d5b87372e445537a987 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1829,28 +1829,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
  }
  #endif
  
-bool bpf_prog_array_compatible(struct bpf_array *array,
-                              const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+                            const struct bpf_prog *fp)
  {
         bool ret;
  
         if (fp->kprobe_override)
                 return false;
  
-       spin_lock(&array->aux->owner.lock);
-
-       if (!array->aux->owner.type) {
+       spin_lock(&map->owner.lock);
+       if (!map->owner.type) {
                 /* There's no owner yet where we could check for
                  * compatibility.
                  */
-               array->aux->owner.type  = fp->type;
-               array->aux->owner.jited = fp->jited;
+               map->owner.type  = fp->type;
+               map->owner.jited = fp->jited;
+               map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
                 ret = true;
         } else {
-               ret = array->aux->owner.type  == fp->type &&
-                     array->aux->owner.jited == fp->jited;
+               ret = map->owner.type  == fp->type &&
+                     map->owner.jited == fp->jited &&
+                     map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
         }
-       spin_unlock(&array->aux->owner.lock);
+       spin_unlock(&map->owner.lock);
+
         return ret;
  }
  
@@ -1862,13 +1864,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
         mutex_lock(&aux->used_maps_mutex);
         for (i = 0; i < aux->used_map_cnt; i++) {
                 struct bpf_map *map = aux->used_maps[i];
-               struct bpf_array *array;
  
-               if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+               if (!map_type_contains_progs(map))
                         continue;
  
-               array = container_of(map, struct bpf_array, map);
-               if (!bpf_prog_array_compatible(array, fp)) {
+               if (!bpf_prog_map_compatible(map, fp)) {
                         ret = -EINVAL;
                         goto out;
                 }
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

index b3e6b94222380ca893e042b4b63cf6dd96ee4584..650e5d21f90d0981aba8a0c4184914a740880ac1 100644 (file)
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data)
         return 0;
  }
  
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+                                     struct bpf_map *map, int fd)
  {
         struct bpf_prog *prog;
  
@@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
         if (IS_ERR(prog))
                 return PTR_ERR(prog);
  
-       if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+       if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+           !bpf_prog_map_compatible(map, prog)) {
                 bpf_prog_put(prog);
                 return -EINVAL;
         }
@@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
         rcpu->map_id = map->id;
         rcpu->value.qsize  = value->qsize;
  
-       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
                 goto free_ptr_ring;
  
         /* Setup kthread */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c

index fe019dbdb3f0af18e3e04fbc9b15c949139749a0..038f6d7a83e43470b5e0517293ff9d5a64609d53 100644 (file)
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
                                              BPF_PROG_TYPE_XDP, false);
                 if (IS_ERR(prog))
                         goto err_put_dev;
-               if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+               if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+                   !bpf_prog_map_compatible(&dtab->map, prog))
                         goto err_put_prog;
         }
  
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index fa4505f9b6119bcb219ab9733847a98da65d1b21..72ce1edde950dc3ffcda611737402700d1817555 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
  
  static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
  {
-       const struct bpf_map *map = filp->private_data;
-       const struct bpf_array *array;
+       struct bpf_map *map = filp->private_data;
         u32 type = 0, jited = 0;
  
-       if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
-               array = container_of(map, struct bpf_array, map);
-               spin_lock(&array->aux->owner.lock);
-               type  = array->aux->owner.type;
-               jited = array->aux->owner.jited;
-               spin_unlock(&array->aux->owner.lock);
+       if (map_type_contains_progs(map)) {
+               spin_lock(&map->owner.lock);
+               type  = map->owner.type;
+               jited = map->owner.jited;
+               spin_unlock(&map->owner.lock);
         }
  
         seq_printf(m,
@@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr)
         atomic64_set(&map->refcnt, 1);
         atomic64_set(&map->usercnt, 1);
         mutex_init(&map->freeze_mutex);
+       spin_lock_init(&map->owner.lock);
  
         map->spin_lock_off = -EINVAL;
         map->timer_off = -EINVAL;
@@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
                                  BPF_F_ANY_ALIGNMENT |
                                  BPF_F_TEST_STATE_FREQ |
                                  BPF_F_SLEEPABLE |
-                                BPF_F_TEST_RND_HI32))
+                                BPF_F_TEST_RND_HI32 |
+                                BPF_F_XDP_HAS_FRAGS))
                 return -EINVAL;
  
         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
         prog->aux->dst_prog = dst_prog;
         prog->aux->offload_requested = !!attr->prog_ifindex;
         prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+       prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
  
         err = security_bpf_prog_alloc(prog->aux);
         if (err)
@@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
         case BPF_FLOW_DISSECTOR:
         case BPF_SK_LOOKUP:
                 return netns_bpf_prog_query(attr, uattr);
+       case BPF_SK_SKB_STREAM_PARSER:
+       case BPF_SK_SKB_STREAM_VERDICT:
+       case BPF_SK_MSG_VERDICT:
+       case BPF_SK_SKB_VERDICT:
+               return sock_map_bpf_prog_query(attr, uattr);
         default:
                 return -EINVAL;
         }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index a39eedecc93a1ab110c5bfd3871f17ffcd74732d..dcf065ec2774b25be89b237b11c5bb3eb0ba60d7 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
  {
         return base_type(type) == PTR_TO_SOCKET ||
                 base_type(type) == PTR_TO_TCP_SOCK ||
-               base_type(type) == PTR_TO_MEM;
+               base_type(type) == PTR_TO_MEM ||
+               base_type(type) == PTR_TO_BTF_ID;
  }
  
  static bool type_is_rdonly_mem(u32 type)
@@ -1743,7 +1744,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
  }
  
  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                        s16 offset, struct module **btf_modp)
+                                        s16 offset)
  {
         struct bpf_kfunc_btf kf_btf = { .offset = offset };
         struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1798,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
                      kfunc_btf_cmp_by_off, NULL);
         }
-       if (btf_modp)
-               *btf_modp = b->module;
         return b->btf;
  }
  
@@ -1815,8 +1814,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
  }
  
  static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                      u32 func_id, s16 offset,
-                                      struct module **btf_modp)
+                                      u32 func_id, s16 offset)
  {
         if (offset) {
                 if (offset < 0) {
@@ -1827,7 +1825,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
                         return ERR_PTR(-EINVAL);
                 }
  
-               return __find_kfunc_desc_btf(env, offset, btf_modp);
+               return __find_kfunc_desc_btf(env, offset);
         }
         return btf_vmlinux ?: ERR_PTR(-ENOENT);
  }
@@ -1890,7 +1888,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
                 prog_aux->kfunc_btf_tab = btf_tab;
         }
  
-       desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+       desc_btf = find_kfunc_desc_btf(env, func_id, offset);
         if (IS_ERR(desc_btf)) {
                 verbose(env, "failed to find BTF for kernel function\n");
                 return PTR_ERR(desc_btf);
@@ -2351,7 +2349,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
                 return NULL;
  
-       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
         if (IS_ERR(desc_btf))
                 return "<error>";
  
@@ -3498,11 +3496,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
  
  #define MAX_PACKET_OFF 0xffff
  
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
-       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
  static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
                                        const struct bpf_call_arg_meta *meta,
                                        enum bpf_access_type t)
@@ -4877,6 +4870,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
         }
  }
  
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+                             struct bpf_reg_state *reg, u32 regno,
+                             bool zero_size_allowed,
+                             struct bpf_call_arg_meta *meta)
+{
+       int err;
+
+       /* This is used to refine r0 return value bounds for helpers
+        * that enforce this value as an upper bound on return values.
+        * See do_refine_retval_range() for helpers that can refine
+        * the return value. C type of helper is u32 so we pull register
+        * bound from umax_value however, if negative verifier errors
+        * out. Only upper bounds can be learned because retval is an
+        * int type and negative retvals are allowed.
+        */
+       if (meta)
+               meta->msize_max_value = reg->umax_value;
+
+       /* The register is SCALAR_VALUE; the access check
+        * happens using its boundaries.
+        */
+       if (!tnum_is_const(reg->var_off))
+               /* For unprivileged variable accesses, disable raw
+                * mode so that the program is required to
+                * initialize all the memory that the helper could
+                * just partially fill up.
+                */
+               meta = NULL;
+
+       if (reg->smin_value < 0) {
+               verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+                       regno);
+               return -EACCES;
+       }
+
+       if (reg->umin_value == 0) {
+               err = check_helper_mem_access(env, regno - 1, 0,
+                                             zero_size_allowed,
+                                             meta);
+               if (err)
+                       return err;
+       }
+
+       if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+               verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+                       regno);
+               return -EACCES;
+       }
+       err = check_helper_mem_access(env, regno - 1,
+                                     reg->umax_value,
+                                     zero_size_allowed, meta);
+       if (!err)
+               err = mark_chain_precision(env, regno);
+       return err;
+}
+
  int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                    u32 regno, u32 mem_size)
  {
@@ -4900,6 +4949,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
         return check_helper_mem_access(env, regno, mem_size, true, NULL);
  }
  
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno)
+{
+       struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+       bool may_be_null = type_may_be_null(mem_reg->type);
+       struct bpf_reg_state saved_reg;
+       int err;
+
+       WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+       if (may_be_null) {
+               saved_reg = *mem_reg;
+               mark_ptr_not_null_reg(mem_reg);
+       }
+
+       err = check_mem_size_reg(env, reg, regno, true, NULL);
+
+       if (may_be_null)
+               *mem_reg = saved_reg;
+       return err;
+}
+
  /* Implementation details:
   * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
   * Two bpf_map_lookups (even with the same key) will have different reg->id.
@@ -5439,51 +5510,7 @@ skip_type_check:
         } else if (arg_type_is_mem_size(arg_type)) {
                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
  
-               /* This is used to refine r0 return value bounds for helpers
-                * that enforce this value as an upper bound on return values.
-                * See do_refine_retval_range() for helpers that can refine
-                * the return value. C type of helper is u32 so we pull register
-                * bound from umax_value however, if negative verifier errors
-                * out. Only upper bounds can be learned because retval is an
-                * int type and negative retvals are allowed.
-                */
-               meta->msize_max_value = reg->umax_value;
-
-               /* The register is SCALAR_VALUE; the access check
-                * happens using its boundaries.
-                */
-               if (!tnum_is_const(reg->var_off))
-                       /* For unprivileged variable accesses, disable raw
-                        * mode so that the program is required to
-                        * initialize all the memory that the helper could
-                        * just partially fill up.
-                        */
-                       meta = NULL;
-
-               if (reg->smin_value < 0) {
-                       verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
-                               regno);
-                       return -EACCES;
-               }
-
-               if (reg->umin_value == 0) {
-                       err = check_helper_mem_access(env, regno - 1, 0,
-                                                     zero_size_allowed,
-                                                     meta);
-                       if (err)
-                               return err;
-               }
-
-               if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
-                       verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-                               regno);
-                       return -EACCES;
-               }
-               err = check_helper_mem_access(env, regno - 1,
-                                             reg->umax_value,
-                                             zero_size_allowed, meta);
-               if (!err)
-                       err = mark_chain_precision(env, regno);
+               err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
         } else if (arg_type_is_alloc_size(arg_type)) {
                 if (!tnum_is_const(reg->var_off)) {
                         verbose(env, "R%d is not a known constant'\n",
@@ -6842,22 +6869,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
         }
  }
  
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                           int *insn_idx_p)
  {
         const struct btf_type *t, *func, *func_proto, *ptr_type;
         struct bpf_reg_state *regs = cur_regs(env);
         const char *func_name, *ptr_type_name;
         u32 i, nargs, func_id, ptr_type_id;
-       struct module *btf_mod = NULL;
+       int err, insn_idx = *insn_idx_p;
         const struct btf_param *args;
         struct btf *desc_btf;
-       int err;
+       bool acq;
  
         /* skip for now, but return error when we find this in fixup_kfunc_call */
         if (!insn->imm)
                 return 0;
  
-       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
         if (IS_ERR(desc_btf))
                 return PTR_ERR(desc_btf);
  
@@ -6866,23 +6894,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
         func_name = btf_name_by_offset(desc_btf, func->name_off);
         func_proto = btf_type_by_id(desc_btf, func->type);
  
-       if (!env->ops->check_kfunc_call ||
-           !env->ops->check_kfunc_call(func_id, btf_mod)) {
+       if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                     BTF_KFUNC_TYPE_CHECK, func_id)) {
                 verbose(env, "calling kernel function %s is not allowed\n",
                         func_name);
                 return -EACCES;
         }
  
+       acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                       BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
         /* Check the arguments */
         err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
-       if (err)
+       if (err < 0)
                 return err;
+       /* In case of release function, we get register number of refcounted
+        * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+        */
+       if (err) {
+               err = release_reference(env, regs[err].ref_obj_id);
+               if (err) {
+                       verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+                               func_name, func_id);
+                       return err;
+               }
+       }
  
         for (i = 0; i < CALLER_SAVED_REGS; i++)
                 mark_reg_not_init(env, regs, caller_saved[i]);
  
         /* Check return type */
         t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+       if (acq && !btf_type_is_ptr(t)) {
+               verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+               return -EINVAL;
+       }
+
         if (btf_type_is_scalar(t)) {
                 mark_reg_unknown(env, regs, BPF_REG_0);
                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +6949,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
                 regs[BPF_REG_0].btf = desc_btf;
                 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
                 regs[BPF_REG_0].btf_id = ptr_type_id;
+               if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                             BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+                       regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+                       /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+                       regs[BPF_REG_0].id = ++env->id_gen;
+               }
                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+               if (acq) {
+                       int id = acquire_reference_state(env, insn_idx);
+
+                       if (id < 0)
+                               return id;
+                       regs[BPF_REG_0].id = id;
+                       regs[BPF_REG_0].ref_obj_id = id;
+               }
         } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
  
         nargs = btf_type_vlen(func_proto);
@@ -11549,7 +11611,7 @@ static int do_check(struct bpf_verifier_env *env)
                                 if (insn->src_reg == BPF_PSEUDO_CALL)
                                         err = check_func_call(env, insn, &env->insn_idx);
                                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
-                                       err = check_kfunc_call(env, insn);
+                                       err = check_kfunc_call(env, insn, &env->insn_idx);
                                 else
                                         err = check_helper_call(env, insn, &env->insn_idx);
                                 if (err)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 21aa30644219e74be06b4b0f2d9d79c345dfff6b..06a9e220069ea3d1a620261ea73b6a5e8281fee1 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
  
  extern const struct bpf_func_proto bpf_skb_output_proto;
  extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
  
  BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
            struct bpf_map *, map, u64, flags)
@@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_sock_from_file_proto;
         case BPF_FUNC_get_socket_cookie:
                 return &bpf_get_socket_ptr_cookie_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_trace_proto;
  #endif
         case BPF_FUNC_seq_printf:
                 return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c

index 46dd95755967242133910bd5764e04bac35d5398..65b52b4bd6e13c7b25944a70114480159228e4dc 100644 (file)
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -5,6 +5,7 @@
  #include <linux/btf.h>
  #include <linux/btf_ids.h>
  #include <linux/slab.h>
+#include <linux/init.h>
  #include <linux/vmalloc.h>
  #include <linux/etherdevice.h>
  #include <linux/filter.h>
@@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
  
  static int bpf_test_finish(const union bpf_attr *kattr,
                            union bpf_attr __user *uattr, const void *data,
-                          u32 size, u32 retval, u32 duration)
+                          struct skb_shared_info *sinfo, u32 size,
+                          u32 retval, u32 duration)
  {
         void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
         int err = -EFAULT;
@@ -145,8 +147,36 @@ static int bpf_test_finish(const union bpf_attr *kattr,
                 err = -ENOSPC;
         }
  
-       if (data_out && copy_to_user(data_out, data, copy_size))
-               goto out;
+       if (data_out) {
+               int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+               if (copy_to_user(data_out, data, len))
+                       goto out;
+
+               if (sinfo) {
+                       int i, offset = len, data_len;
+
+                       for (i = 0; i < sinfo->nr_frags; i++) {
+                               skb_frag_t *frag = &sinfo->frags[i];
+
+                               if (offset >= copy_size) {
+                                       err = -ENOSPC;
+                                       break;
+                               }
+
+                               data_len = min_t(int, copy_size - offset,
+                                                skb_frag_size(frag));
+
+                               if (copy_to_user(data_out + offset,
+                                                skb_frag_address(frag),
+                                                data_len))
+                                       goto out;
+
+                               offset += data_len;
+                       }
+               }
+       }
+
         if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
                 goto out;
         if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -171,6 +201,8 @@ int noinline bpf_fentry_test1(int a)
  {
         return a + 1;
  }
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
  
  int noinline bpf_fentry_test2(int a, u64 b)
  {
@@ -232,28 +264,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
         return sk;
  }
  
+struct prog_test_ref_kfunc {
+       int a;
+       int b;
+       struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+       .a = 42,
+       .b = 108,
+       .next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+       /* randomly return NULL */
+       if (get_jiffies_64() % 2)
+               return NULL;
+       return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+       int x0;
+       struct {
+               int x1;
+               struct {
+                       int x2;
+                       struct {
+                               int x3;
+                       };
+               };
+       };
+};
+
+struct prog_test_pass2 {
+       int len;
+       short arr1[4];
+       struct {
+               char arr2[4];
+               unsigned long arr3[8];
+       } x;
+};
+
+struct prog_test_fail1 {
+       void *p;
+       int x;
+};
+
+struct prog_test_fail2 {
+       int x8;
+       struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+       int len;
+       char arr1[2];
+       char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
  __diag_pop();
  
  ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
  
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
  BTF_ID(func, bpf_kfunc_call_test1)
  BTF_ID(func, bpf_kfunc_call_test2)
  BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
-       if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
-                          u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+                          u32 size, u32 headroom, u32 tailroom)
  {
         void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
-       u32 user_size = kattr->test.data_size_in;
         void *data;
  
         if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -581,7 +727,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
         if (kattr->test.flags || kattr->test.cpu)
                 return -EINVAL;
  
-       data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+       data = bpf_test_init(kattr, kattr->test.data_size_in,
+                            size, NET_SKB_PAD + NET_IP_ALIGN,
                              SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
         if (IS_ERR(data))
                 return PTR_ERR(data);
@@ -683,7 +830,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
         /* bpf program can never convert linear skb to non-linear */
         if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
                 size = skb_headlen(skb);
-       ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+                             duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, ctx,
                                      sizeof(struct __sk_buff));
@@ -758,16 +906,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                           union bpf_attr __user *uattr)
  {
         u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-       u32 headroom = XDP_PACKET_HEADROOM;
         u32 size = kattr->test.data_size_in;
+       u32 headroom = XDP_PACKET_HEADROOM;
+       u32 retval, duration, max_data_sz;
         u32 repeat = kattr->test.repeat;
         struct netdev_rx_queue *rxqueue;
+       struct skb_shared_info *sinfo;
         struct xdp_buff xdp = {};
-       u32 retval, duration;
+       int i, ret = -EINVAL;
         struct xdp_md *ctx;
-       u32 max_data_sz;
         void *data;
-       int ret = -EINVAL;
  
         if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
             prog->expected_attach_type == BPF_XDP_CPUMAP)
@@ -787,26 +935,60 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                 headroom -= ctx->data;
         }
  
-       /* XDP have extra tailroom as (most) drivers use full page */
         max_data_sz = 4096 - headroom - tailroom;
+       size = min_t(u32, size, max_data_sz);
  
-       data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+       data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
         if (IS_ERR(data)) {
                 ret = PTR_ERR(data);
                 goto free_ctx;
         }
  
         rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
-       xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
-                     &rxqueue->xdp_rxq);
+       rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+       xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
         xdp_prepare_buff(&xdp, data, headroom, size, true);
+       sinfo = xdp_get_shared_info_from_buff(&xdp);
  
         ret = xdp_convert_md_to_buff(ctx, &xdp);
         if (ret)
                 goto free_data;
  
+       if (unlikely(kattr->test.data_size_in > size)) {
+               void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+               while (size < kattr->test.data_size_in) {
+                       struct page *page;
+                       skb_frag_t *frag;
+                       int data_len;
+
+                       page = alloc_page(GFP_KERNEL);
+                       if (!page) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+
+                       frag = &sinfo->frags[sinfo->nr_frags++];
+                       __skb_frag_set_page(frag, page);
+
+                       data_len = min_t(int, kattr->test.data_size_in - size,
+                                        PAGE_SIZE);
+                       skb_frag_size_set(frag, data_len);
+
+                       if (copy_from_user(page_address(page), data_in + size,
+                                          data_len)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+                       sinfo->xdp_frags_size += data_len;
+                       size += data_len;
+               }
+               xdp_buff_set_frags_flag(&xdp);
+       }
+
         if (repeat > 1)
                 bpf_prog_change_xdp(NULL, prog);
+
         ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
         /* We convert the xdp_buff back to an xdp_md before checking the return
          * code so the reference count of any held netdevice will be decremented
@@ -816,12 +998,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
         if (ret)
                 goto out;
  
-       if (xdp.data_meta != data + headroom ||
-           xdp.data_end != xdp.data_meta + size)
-               size = xdp.data_end - xdp.data_meta;
-
-       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
-                             duration);
+       size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+                             retval, duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, ctx,
                                      sizeof(struct xdp_md));
@@ -830,6 +1009,8 @@ out:
         if (repeat > 1)
                 bpf_prog_change_xdp(prog, NULL);
  free_data:
+       for (i = 0; i < sinfo->nr_frags; i++)
+               __free_page(skb_frag_page(&sinfo->frags[i]));
         kfree(data);
  free_ctx:
         kfree(ctx);
@@ -876,7 +1057,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
         if (size < ETH_HLEN)
                 return -EINVAL;
  
-       data = bpf_test_init(kattr, size, 0, 0);
+       data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
         if (IS_ERR(data))
                 return PTR_ERR(data);
  
@@ -911,8 +1092,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
         if (ret < 0)
                 goto out;
  
-       ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
-                             retval, duration);
+       ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+                             sizeof(flow_keys), retval, duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, user_ctx,
                                      sizeof(struct bpf_flow_keys));
@@ -1016,7 +1197,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
                 user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
         }
  
-       ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
         if (!ret)
                 ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
  
@@ -1067,3 +1248,17 @@ out:
         kfree(ctx);
         return err;
  }
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &test_sk_check_kfunc_ids,
+       .acquire_set  = &test_sk_acquire_kfunc_ids,
+       .release_set  = &test_sk_release_kfunc_ids,
+       .ret_null_set = &test_sk_ret_null_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
diff --git a/net/core/filter.c b/net/core/filter.c

index 4603b7cd3cd17e5f39d9d0e047529d07af98bc25..a06931c27eebe3f0d9a0232c4d9bf1ce9a69f092 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
         .arg2_type      = ARG_ANYTHING,
         .arg3_type      = ARG_ANYTHING,
  };
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct  xdp_buff*, xdp)
+{
+       return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg1_btf_id    = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
  static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
  {
         return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
         .arg2_type      = ARG_ANYTHING,
  };
  
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+                            void *buf, unsigned long len, bool flush)
+{
+       unsigned long ptr_len, ptr_off = 0;
+       skb_frag_t *next_frag, *end_frag;
+       struct skb_shared_info *sinfo;
+       void *src, *dst;
+       u8 *ptr_buf;
+
+       if (likely(xdp->data_end - xdp->data >= off + len)) {
+               src = flush ? buf : xdp->data + off;
+               dst = flush ? xdp->data + off : buf;
+               memcpy(dst, src, len);
+               return;
+       }
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       end_frag = &sinfo->frags[sinfo->nr_frags];
+       next_frag = &sinfo->frags[0];
+
+       ptr_len = xdp->data_end - xdp->data;
+       ptr_buf = xdp->data;
+
+       while (true) {
+               if (off < ptr_off + ptr_len) {
+                       unsigned long copy_off = off - ptr_off;
+                       unsigned long copy_len = min(len, ptr_len - copy_off);
+
+                       src = flush ? buf : ptr_buf + copy_off;
+                       dst = flush ? ptr_buf + copy_off : buf;
+                       memcpy(dst, src, copy_len);
+
+                       off += copy_len;
+                       len -= copy_len;
+                       buf += copy_len;
+               }
+
+               if (!len || next_frag == end_frag)
+                       break;
+
+               ptr_off += ptr_len;
+               ptr_buf = skb_frag_address(next_frag);
+               ptr_len = skb_frag_size(next_frag);
+               next_frag++;
+       }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       u32 size = xdp->data_end - xdp->data;
+       void *addr = xdp->data;
+       int i;
+
+       if (unlikely(offset > 0xffff || len > 0xffff))
+               return ERR_PTR(-EFAULT);
+
+       if (offset + len > xdp_get_buff_len(xdp))
+               return ERR_PTR(-EINVAL);
+
+       if (offset < size) /* linear area */
+               goto out;
+
+       offset -= size;
+       for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+               u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+               if  (offset < frag_size) {
+                       addr = skb_frag_address(&sinfo->frags[i]);
+                       size = frag_size;
+                       break;
+               }
+               offset -= frag_size;
+       }
+out:
+       return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+       else
+               memcpy(buf, ptr, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+       .func           = bpf_xdp_load_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+       else
+               memcpy(ptr, buf, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+       .func           = bpf_xdp_store_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+       struct xdp_rxq_info *rxq = xdp->rxq;
+       unsigned int tailroom;
+
+       if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+               return -EOPNOTSUPP;
+
+       tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+       if (unlikely(offset > tailroom))
+               return -EINVAL;
+
+       memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+       skb_frag_size_add(frag, offset);
+       sinfo->xdp_frags_size += offset;
+
+       return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       int i, n_frags_free = 0, len_free = 0;
+
+       if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+               return -EINVAL;
+
+       for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+               skb_frag_t *frag = &sinfo->frags[i];
+               int shrink = min_t(int, offset, skb_frag_size(frag));
+
+               len_free += shrink;
+               offset -= shrink;
+
+               if (skb_frag_size(frag) == shrink) {
+                       struct page *page = skb_frag_page(frag);
+
+                       __xdp_return(page_address(page), &xdp->rxq->mem,
+                                    false, NULL);
+                       n_frags_free++;
+               } else {
+                       skb_frag_size_sub(frag, shrink);
+                       break;
+               }
+       }
+       sinfo->nr_frags -= n_frags_free;
+       sinfo->xdp_frags_size -= len_free;
+
+       if (unlikely(!sinfo->nr_frags)) {
+               xdp_buff_clear_frags_flag(xdp);
+               xdp->data_end -= offset;
+       }
+
+       return 0;
+}
+
  BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
  {
         void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
         void *data_end = xdp->data_end + offset;
  
+       if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+               if (offset < 0)
+                       return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+               return bpf_xdp_frags_increase_tail(xdp, offset);
+       }
+
         /* Notice that xdp_data_hard_end have reserved some tailroom */
         if (unlikely(data_end > data_hard_end))
                 return -EINVAL;
@@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
         enum bpf_map_type map_type = ri->map_type;
  
+       /* XDP_REDIRECT is not fully supported yet for xdp frags since
+        * not all XDP capable drivers can map non-linear xdp_frame in
+        * ndo_xdp_xmit.
+        */
+       if (unlikely(xdp_buff_has_frags(xdp) &&
+                    map_type != BPF_MAP_TYPE_CPUMAP))
+               return -EOPNOTSUPP;
+
         if (map_type == BPF_MAP_TYPE_XSKMAP)
                 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
  
@@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
  };
  #endif
  
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
                                   unsigned long off, unsigned long len)
  {
-       memcpy(dst_buff, src_buff + off, len);
+       struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+       bpf_xdp_copy_buf(xdp, off, dst, len, false);
         return 0;
  }
  
@@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
  
         if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                 return -EINVAL;
-       if (unlikely(!xdp ||
-                    xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+       if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
                 return -EFAULT;
  
-       return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+       return bpf_event_output(map, flags, meta, meta_size, xdp,
                                 xdp_size, bpf_xdp_copy);
  }
  
@@ -7533,6 +7762,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_xdp_redirect_map_proto;
         case BPF_FUNC_xdp_adjust_tail:
                 return &bpf_xdp_adjust_tail_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_proto;
+       case BPF_FUNC_xdp_load_bytes:
+               return &bpf_xdp_load_bytes_proto;
+       case BPF_FUNC_xdp_store_bytes:
+               return &bpf_xdp_store_bytes_proto;
         case BPF_FUNC_fib_lookup:
                 return &bpf_xdp_fib_lookup_proto;
         case BPF_FUNC_check_mtu:
@@ -10062,7 +10297,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
         .convert_ctx_access     = tc_cls_act_convert_ctx_access,
         .gen_prologue           = tc_cls_act_prologue,
         .gen_ld_abs             = bpf_gen_ld_abs,
-       .check_kfunc_call       = bpf_prog_test_check_kfunc_call,
  };
  
  const struct bpf_prog_ops tc_cls_act_prog_ops = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c

index a5b5bb99c64462dac2513f7b3e28cb0763844c21..c53d9aab38ab6cb5f0d24a7427bed01c2151aa1d 100644 (file)
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -301,6 +301,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
  
         return peer;
  }
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
  
  /*
   * setup_net runs the initializers for the network namespace object.
diff --git a/net/core/sock_map.c b/net/core/sock_map.c

index 1827669eedd6f1240a318b3e81aa1c040da8aef0..2d213c4011dbd03b972d52afd307832190197bab 100644 (file)
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
         return NULL;
  }
  
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-                               struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+                               u32 which)
  {
         struct sk_psock_progs *progs = sock_map_progs(map);
-       struct bpf_prog **pprog;
  
         if (!progs)
                 return -EOPNOTSUPP;
  
         switch (which) {
         case BPF_SK_MSG_VERDICT:
-               pprog = &progs->msg_parser;
+               *pprog = &progs->msg_parser;
                 break;
  #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
         case BPF_SK_SKB_STREAM_PARSER:
-               pprog = &progs->stream_parser;
+               *pprog = &progs->stream_parser;
                 break;
  #endif
         case BPF_SK_SKB_STREAM_VERDICT:
                 if (progs->skb_verdict)
                         return -EBUSY;
-               pprog = &progs->stream_verdict;
+               *pprog = &progs->stream_verdict;
                 break;
         case BPF_SK_SKB_VERDICT:
                 if (progs->stream_verdict)
                         return -EBUSY;
-               pprog = &progs->skb_verdict;
+               *pprog = &progs->skb_verdict;
                 break;
         default:
                 return -EOPNOTSUPP;
         }
  
+       return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+                               struct bpf_prog *old, u32 which)
+{
+       struct bpf_prog **pprog;
+       int ret;
+
+       ret = sock_map_prog_lookup(map, &pprog, which);
+       if (ret)
+               return ret;
+
         if (old)
                 return psock_replace_prog(pprog, prog, old);
  
@@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
         return 0;
  }
  
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr)
+{
+       __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+       u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+       struct bpf_prog **pprog;
+       struct bpf_prog *prog;
+       struct bpf_map *map;
+       struct fd f;
+       u32 id = 0;
+       int ret;
+
+       if (attr->query.query_flags)
+               return -EINVAL;
+
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       rcu_read_lock();
+
+       ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+       if (ret)
+               goto end;
+
+       prog = *pprog;
+       prog_cnt = !prog ? 0 : 1;
+
+       if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+               goto end;
+
+       /* we do not hold the refcnt, the bpf prog may be released
+        * asynchronously and the id would be set to 0.
+        */
+       id = data_race(prog->aux->id);
+       if (id == 0)
+               prog_cnt = 0;
+
+end:
+       rcu_read_unlock();
+
+       if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+           (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+           copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+               ret = -EFAULT;
+
+       fdput(f);
+       return ret;
+}
+
  static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
  {
         switch (link->map->map_type) {
diff --git a/net/core/xdp.c b/net/core/xdp.c

index 7aba355049862d79e1a31d2990b76fe1c1ca318f..361df312ee7f57fdd3bff6e883ab4c4efa6ae66d 100644 (file)
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
  }
  
  /* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size)
  {
         if (!dev) {
                 WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
         xdp_rxq->dev = dev;
         xdp_rxq->queue_index = queue_index;
         xdp_rxq->napi_id = napi_id;
+       xdp_rxq->frag_size = frag_size;
  
         xdp_rxq->reg_state = REG_STATE_REGISTERED;
         return 0;
  }
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
  
  void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
  {
@@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
   * is used for those calls sites.  Thus, allowing for faster recycling
   * of xdp_frames/pages in those cases.
   */
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-                        struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp)
  {
         struct xdp_mem_allocator *xa;
         struct page *page;
@@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
  
  void xdp_return_frame(struct xdp_frame *xdpf)
  {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+       }
+out:
         __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
  }
  EXPORT_SYMBOL_GPL(xdp_return_frame);
  
  void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
  {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+       }
+out:
         __xdp_return(xdpf->data, &xdpf->mem, true, NULL);
  }
  EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
         struct xdp_mem_allocator *xa;
  
         if (mem->type != MEM_TYPE_PAGE_POOL) {
-               __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+               xdp_return_frame(xdpf);
                 return;
         }
  
@@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
                 bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
         }
  
+       if (unlikely(xdp_frame_has_frags(xdpf))) {
+               struct skb_shared_info *sinfo;
+               int i;
+
+               sinfo = xdp_get_shared_info_from_frame(xdpf);
+               for (i = 0; i < sinfo->nr_frags; i++) {
+                       skb_frag_t *frag = &sinfo->frags[i];
+
+                       bq->q[bq->count++] = skb_frag_address(frag);
+                       if (bq->count == XDP_BULK_QUEUE_SIZE)
+                               xdp_flush_frame_bulk(bq);
+               }
+       }
         bq->q[bq->count++] = xdpf->data;
  }
  EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
  
  void xdp_return_buff(struct xdp_buff *xdp)
  {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+       }
+out:
         __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
  }
  
@@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                            struct sk_buff *skb,
                                            struct net_device *dev)
  {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
         unsigned int headroom, frame_size;
         void *hard_start;
+       u8 nr_frags;
+
+       /* xdp frags frame */
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               nr_frags = sinfo->nr_frags;
  
         /* Part of headroom was reserved to xdpf */
         headroom = sizeof(*xdpf) + xdpf->headroom;
@@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
         if (xdpf->metasize)
                 skb_metadata_set(skb, xdpf->metasize);
  
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               xdp_update_skb_shared_info(skb, nr_frags,
+                                          sinfo->xdp_frags_size,
+                                          nr_frags * xdpf->frame_sz,
+                                          xdp_frame_is_frag_pfmemalloc(xdpf));
+
         /* Essential SKB info: protocol and skb->dev */
         skb->protocol = eth_type_trans(skb, dev);
  
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c

index de610cb83694f685d1c7d3d408ad8c4a0d0b0d81..b60c9fd7147e6df772aba45d18f036f208d49283 100644 (file)
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -1,6 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0
  /* Copyright (c) 2019 Facebook  */
  
+#include <linux/init.h>
  #include <linux/types.h>
  #include <linux/bpf_verifier.h>
  #include <linux/bpf.h>
@@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
         }
  }
  
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
  BTF_ID(func, tcp_reno_ssthresh)
  BTF_ID(func, tcp_reno_cong_avoid)
  BTF_ID(func, tcp_reno_undo_cwnd)
  BTF_ID(func, tcp_slow_start)
  BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
  
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
-       if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
  
  static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
         .get_func_proto         = bpf_tcp_ca_get_func_proto,
         .is_valid_access        = bpf_tcp_ca_is_valid_access,
         .btf_struct_access      = bpf_tcp_ca_btf_struct_access,
-       .check_kfunc_call       = bpf_tcp_ca_check_kfunc_call,
  };
  
  static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
         .init = bpf_tcp_ca_init,
         .name = "tcp_congestion_ops",
  };
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c

index ec5550089b4d29c900b073d34fe8fb365750a3a5..02e8626ccb278705b8c611332e47fe5b7c732cf5 100644 (file)
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
         .set_state      = bbr_set_state,
  };
  
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
  #ifdef CONFIG_X86
  #ifdef CONFIG_DYNAMIC_FTRACE
  BTF_ID(func, bbr_init)
@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
  BTF_ID(func, bbr_set_state)
  #endif
  #endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_bbr_check_kfunc_ids,
+};
  
  static int __init bbr_register(void)
  {
         int ret;
  
         BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&tcp_bbr_cong_ops);
  }
  
  static void __exit bbr_unregister(void)
  {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
         tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
  }
  
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c

index e07837e23b3fd2435c87320945528abdee9817cc..24d562dd62254d6e50dd08236f8967400d81e1ea 100644 (file)
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
         .name           = "cubic",
  };
  
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
  #ifdef CONFIG_X86
  #ifdef CONFIG_DYNAMIC_FTRACE
  BTF_ID(func, cubictcp_init)
@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
  BTF_ID(func, cubictcp_acked)
  #endif
  #endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_cubic_check_kfunc_ids,
+};
  
  static int __init cubictcp_register(void)
  {
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
         /* divide by bic_scale and by constant Srtt (100ms) */
         do_div(cube_factor, bic_scale * 10);
  
-       ret = tcp_register_congestion_control(&cubictcp);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&cubictcp);
  }
  
  static void __exit cubictcp_unregister(void)
  {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
         tcp_unregister_congestion_control(&cubictcp);
  }
  
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c

index 0d7ab3cc7b614c1754f4940b721c4dd89ce19b9c..1943a6630341c67f98a43dc1052321fe9edebfaa 100644 (file)
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
         .name           = "dctcp-reno",
  };
  
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
  #ifdef CONFIG_X86
  #ifdef CONFIG_DYNAMIC_FTRACE
  BTF_ID(func, dctcp_init)
@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
  BTF_ID(func, dctcp_state)
  #endif
  #endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_dctcp_check_kfunc_ids,
+};
  
  static int __init dctcp_register(void)
  {
         int ret;
  
         BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&dctcp);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&dctcp);
  }
  
  static void __exit dctcp_unregister(void)
  {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
         tcp_unregister_congestion_control(&dctcp);
  }
  
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile

index a135b1a46014c24a0d611a6dadc7c20b8f7a8cd9..238b6a620e888911e9a0034cdb42909079eab056 100644 (file)
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
  nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
  nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
  nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
  
  obj-$(CONFIG_NETFILTER) = netfilter.o
  
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c

new file mode 100644 (file)

index 0000000..8ad3f52
--- /dev/null
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id   - Specify the network namespace for lookup
+ *              Values:
+ *                BPF_F_CURRENT_NETNS (-1)
+ *                  Use namespace associated with ctx (xdp_md, __sk_buff)
+ *                [0, S32_MAX]
+ *                  Network Namespace ID
+ * @error      - Out parameter, set for any errors encountered
+ *              Values:
+ *                -EINVAL - Passed NULL for bpf_tuple pointer
+ *                -EINVAL - opts->reserved is not 0
+ *                -EINVAL - netns_id is less than -1
+ *                -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ *                -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *                -ENONET - No network namespace found for netns_id
+ *                -ENOENT - Conntrack lookup could not find entry for tuple
+ *                -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *                                or sizeof(tuple->ipv6)
+ * @l4proto    - Layer 4 protocol
+ *              Values:
+ *                IPPROTO_TCP, IPPROTO_UDP
+ * @reserved   - Reserved member, will be reused for more options in future
+ *              Values:
+ *                0
+ */
+struct bpf_ct_opts {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+};
+
+enum {
+       NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+                                         struct bpf_sock_tuple *bpf_tuple,
+                                         u32 tuple_len, u8 protonum,
+                                         s32 netns_id)
+{
+       struct nf_conntrack_tuple_hash *hash;
+       struct nf_conntrack_tuple tuple;
+
+       if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+               return ERR_PTR(-EPROTO);
+       if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+               return ERR_PTR(-EINVAL);
+
+       memset(&tuple, 0, sizeof(tuple));
+       switch (tuple_len) {
+       case sizeof(bpf_tuple->ipv4):
+               tuple.src.l3num = AF_INET;
+               tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+               tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+               tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+               tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+               break;
+       case sizeof(bpf_tuple->ipv6):
+               tuple.src.l3num = AF_INET6;
+               memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+               tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+               memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+               tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+               break;
+       default:
+               return ERR_PTR(-EAFNOSUPPORT);
+       }
+
+       tuple.dst.protonum = protonum;
+
+       if (netns_id >= 0) {
+               net = get_net_ns_by_id(net, netns_id);
+               if (unlikely(!net))
+                       return ERR_PTR(-ENONET);
+       }
+
+       hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+       if (netns_id >= 0)
+               put_net(net);
+       if (!hash)
+               return ERR_PTR(-ENOENT);
+       return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+             "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @xdp_ctx    - Pointer to ctx (xdp_md) in XDP program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = dev_net(ctx->rxq->dev);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @skb_ctx    - Pointer to ctx (__sk_buff) in TC program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn     - Pointer to referenced nf_conn object, obtained using
+ *                bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+       if (!nfct)
+               return;
+       nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_tc_check_kfunc_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_tc_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_acquire_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(nf_ct_acquire_kfunc_ids)
+
+BTF_SET_START(nf_ct_release_kfunc_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_release_kfunc_ids)
+
+/* Both sets are identical */
+#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
+
+static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_xdp_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_tc_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+       int ret;
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
+       return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index d6aa5b47031ebcb4a35abbd01f48fc46b746c8ab..d38d689de23c8b688cbdb588ad608b96ed76dc9f 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -34,6 +34,7 @@
  #include <linux/rculist_nulls.h>
  
  #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
  #include <net/netfilter/nf_conntrack_l4proto.h>
  #include <net/netfilter/nf_conntrack_expect.h>
  #include <net/netfilter/nf_conntrack_helper.h>
@@ -2750,8 +2751,15 @@ int nf_conntrack_init_start(void)
         conntrack_gc_work_init(&conntrack_gc_work);
         queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
  
+       ret = register_nf_conntrack_bpf();
+       if (ret < 0)
+               goto err_kfunc;
+
         return 0;
  
+err_kfunc:
+       cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+       nf_conntrack_proto_fini();
  err_proto:
         nf_conntrack_seqadj_fini();
  err_seqadj:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index c19569819866eed2dc4c2c3fb90931fcf407acab..3e0d6281fd1efcdf204fe74d09ebcfed5269be48 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
         return sk;
  }
  
-static struct sock *unix_next_socket(struct seq_file *seq,
-                                    struct sock *sk,
-                                    loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
  {
         unsigned long bucket = get_bucket(*pos);
+       struct sock *sk;
  
-       while (sk > (struct sock *)SEQ_START_TOKEN) {
-               sk = sk_next(sk);
-               if (!sk)
-                       goto next_bucket;
-               if (sock_net(sk) == seq_file_net(seq))
-                       return sk;
-       }
-
-       do {
+       while (bucket < ARRAY_SIZE(unix_socket_table)) {
                 spin_lock(&unix_table_locks[bucket]);
+
                 sk = unix_from_bucket(seq, pos);
                 if (sk)
                         return sk;
  
-next_bucket:
-               spin_unlock(&unix_table_locks[bucket++]);
-               *pos = set_bucket_offset(bucket, 1);
-       } while (bucket < ARRAY_SIZE(unix_socket_table));
+               spin_unlock(&unix_table_locks[bucket]);
+
+               *pos = set_bucket_offset(++bucket, 1);
+       }
  
         return NULL;
  }
  
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+                                 loff_t *pos)
+{
+       unsigned long bucket = get_bucket(*pos);
+
+       for (sk = sk_next(sk); sk; sk = sk_next(sk))
+               if (sock_net(sk) == seq_file_net(seq))
+                       return sk;
+
+       spin_unlock(&unix_table_locks[bucket]);
+
+       *pos = set_bucket_offset(++bucket, 1);
+
+       return unix_get_first(seq, pos);
+}
+
  static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
  {
         if (!*pos)
                 return SEQ_START_TOKEN;
  
-       if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
-               return NULL;
-
-       return unix_next_socket(seq, NULL, pos);
+       return unix_get_first(seq, pos);
  }
  
  static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         ++*pos;
-       return unix_next_socket(seq, v, pos);
+
+       if (v == SEQ_START_TOKEN)
+               return unix_get_first(seq, pos);
+
+       return unix_get_next(seq, v, pos);
  }
  
  static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = {
  };
  
  #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+       struct seq_net_private p;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
  struct bpf_iter__unix {
         __bpf_md_ptr(struct bpf_iter_meta *, meta);
         __bpf_md_ptr(struct unix_sock *, unix_sk);
@@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
         return bpf_iter_run_prog(prog, &ctx);
  }
  
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+               if (sock_net(sk) != seq_file_net(seq))
+                       continue;
+
+               if (iter->end_sk < iter->max_sk) {
+                       sock_hold(sk);
+                       iter->batch[iter->end_sk++] = sk;
+               }
+
+               expected++;
+       }
+
+       spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+
+       return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+                                      unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_unix_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+                                       loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       if (iter->st_bucket_done)
+               *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+
+       sk = unix_get_first(seq, pos);
+       if (!sk)
+               return NULL; /* Done */
+
+       expected = bpf_iter_unix_hold_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       if (!*pos)
+               return SEQ_START_TOKEN;
+
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+
+       ++*pos;
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_unix_batch(seq, pos);
+
+       return sk;
+}
+
  static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
  {
         struct bpf_iter_meta meta;
         struct bpf_prog *prog;
         struct sock *sk = v;
         uid_t uid;
+       bool slow;
+       int ret;
  
         if (v == SEQ_START_TOKEN)
                 return 0;
  
+       slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
         uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
         meta.seq = seq;
         prog = bpf_iter_get_info(&meta, false);
-       return unix_prog_seq_show(prog, &meta, v, uid);
+       ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+       unlock_sock_fast(sk, slow);
+       return ret;
  }
  
  static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
  {
+       struct bpf_unix_iter_state *iter = seq->private;
         struct bpf_iter_meta meta;
         struct bpf_prog *prog;
  
@@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
                         (void)unix_prog_seq_show(prog, &meta, v, 0);
         }
  
-       unix_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk)
+               bpf_iter_unix_put_batch(iter);
  }
  
  static const struct seq_operations bpf_iter_unix_seq_ops = {
-       .start  = unix_seq_start,
-       .next   = unix_seq_next,
+       .start  = bpf_iter_unix_seq_start,
+       .next   = bpf_iter_unix_seq_next,
         .stop   = bpf_iter_unix_seq_stop,
         .show   = bpf_iter_unix_seq_show,
  };
@@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = {
  DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
                      struct unix_sock *unix_sk, uid_t uid)
  
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+       int err;
+
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
+
+       err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+
+       bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
+}
+
  static const struct bpf_iter_seq_info unix_seq_info = {
         .seq_ops                = &bpf_iter_unix_seq_ops,
-       .init_seq_private       = bpf_iter_init_seq_net,
-       .fini_seq_private       = bpf_iter_fini_seq_net,
-       .seq_priv_size          = sizeof(struct seq_net_private),
+       .init_seq_private       = bpf_iter_init_unix,
+       .fini_seq_private       = bpf_iter_fini_unix,
+       .seq_priv_size          = sizeof(struct bpf_unix_iter_state),
  };
  
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
  static struct bpf_iter_reg unix_reg_info = {
         .target                 = "unix",
         .ctx_arg_info_size      = 1,
@@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = {
                 { offsetof(struct bpf_iter__unix, unix_sk),
                   PTR_TO_BTF_ID_OR_NULL },
         },
+       .get_func_proto         = bpf_iter_unix_get_func_proto,
         .seq_info               = &unix_seq_info,
  };
  
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c

index 8675fa5273df8831cd7415f1ce9514f176c1fe55..3ec8ad9c175088abcfbef5c65106753c0c26213c 100644 (file)
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -26,12 +26,12 @@ static void int_exit(int sig)
  {
         __u32 curr_prog_id = 0;
  
-       if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                 exit(1);
         }
         if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+               bpf_xdp_detach(ifindex, xdp_flags, NULL);
         else if (!curr_prog_id)
                 printf("couldn't find a prog id on a given interface\n");
         else
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
         signal(SIGINT, int_exit);
         signal(SIGTERM, int_exit);
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 printf("link set xdp fd failed\n");
                 return 1;
         }
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c

index a70b094c8ec57e0937def9965a1c3140e169fd4c..6c61d5f570fbfbc15aec06c9d4b6fa03d3f6fd09 100644 (file)
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -34,12 +34,12 @@ static void int_exit(int sig)
         __u32 curr_prog_id = 0;
  
         if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                         exit(1);
                 }
                 if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                 else if (!curr_prog_id)
                         printf("couldn't find a prog id on a given iface\n");
                 else
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
         signal(SIGINT, int_exit);
         signal(SIGTERM, int_exit);
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 printf("link set xdp fd failed\n");
                 return 1;
         }
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c

index 4ad896782f77cade74585d929e0443051a68552a..79ccd9891924c77238cf31039dac6a573d6693b6 100644 (file)
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
  {
         int err;
  
-       err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+       err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
         if (err < 0) {
                 printf("ERROR: failed to attach program to %s\n", name);
                 return err;
@@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name)
  {
         int err;
  
-       err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+       err = bpf_xdp_detach(idx, xdp_flags, NULL);
         if (err < 0)
                 printf("ERROR: failed to detach program from %s\n", name);
  
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c

index cfaf7e50e4316981595a2b5333bbda04498c45ea..2d565ba54b8caec908e12cccfb684dd5859820b0 100644 (file)
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -43,13 +43,13 @@ static void int_exit(int sig)
         int i = 0;
  
         for (i = 0; i < total_ifindex; i++) {
-               if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
-                       printf("bpf_get_link_xdp_id on iface %d failed\n",
+               if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
+                       printf("bpf_xdp_query_id on iface %d failed\n",
                                ifindex_list[i]);
                         exit(1);
                 }
                 if (prog_id_list[i] == prog_id)
-                       bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                       bpf_xdp_detach(ifindex_list[i], flags, NULL);
                 else if (!prog_id)
                         printf("couldn't find a prog id on iface %d\n",
                                ifindex_list[i]);
@@ -716,12 +716,12 @@ int main(int ac, char **argv)
         }
         prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
         for (i = 0; i < total_ifindex; i++) {
-               if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
+               if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
                         printf("link set xdp fd failed\n");
                         int recovery_index = i;
  
                         for (i = 0; i < recovery_index; i++)
-                               bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                               bpf_xdp_detach(ifindex_list[i], flags, NULL);
  
                         return 1;
                 }
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c

index 74a2926eba08d81ea89fc2041e40d3d34b56df0e..fb2532d13aac0da9c96d6a9965052b4e734a04a9 100644 (file)
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -62,15 +62,15 @@ static void int_exit(int sig)
         __u32 curr_prog_id = 0;
  
         if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                         exit(EXIT_FAIL);
                 }
                 if (prog_id == curr_prog_id) {
                         fprintf(stderr,
                                 "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
                                 ifindex, ifname);
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                 } else if (!curr_prog_id) {
                         printf("couldn't find a prog id on a given iface\n");
                 } else {
@@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void)
  
  static struct record *alloc_record_per_rxq(void)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         struct record *array;
  
         array = calloc(nr_rxqs, sizeof(struct record));
@@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void)
  
  static struct stats_record *alloc_stats_record(void)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         struct stats_record *rec;
         int i;
  
@@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void)
  
  static void free_stats_record(struct stats_record *r)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         int i;
  
         for (i = 0; i < nr_rxqs; i++)
@@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec)
         map_collect_percpu(fd, 0, &rec->stats);
  
         fd = bpf_map__fd(rx_queue_index_map);
-       max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       max_rxqs = bpf_map__max_entries(rx_queue_index_map);
         for (i = 0; i < max_rxqs; i++)
                 map_collect_percpu(fd, i, &rec->rxq[i]);
  }
@@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec,
                         struct stats_record *stats_prev,
                         int action, __u32 cfg_opt)
  {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
         unsigned int nr_cpus = bpf_num_possible_cpus();
         double pps = 0, err = 0;
         struct record *rec, *prev;
@@ -582,7 +582,7 @@ int main(int argc, char **argv)
         signal(SIGINT, int_exit);
         signal(SIGTERM, int_exit);
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 fprintf(stderr, "link set xdp fd failed\n");
                 return EXIT_FAIL_XDP;
         }
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c

index 587eacb491037c099cebdea512fe9f57dd55e7ed..0a2b3e997aed06206982b0084ec251db3bb37480 100644 (file)
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name)
         __u32 info_len = sizeof(info);
         int err;
  
-       err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+       err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
         if (err < 0) {
                 printf("ERROR: failed to attach program to %s\n", name);
                 return err;
@@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name)
         __u32 curr_prog_id = 0;
         int err = 0;
  
-       err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+       err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
         if (err) {
-               printf("bpf_get_link_xdp_id failed\n");
+               printf("bpf_xdp_query_id failed\n");
                 return err;
         }
         if (prog_id == curr_prog_id) {
-               err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+               err = bpf_xdp_detach(idx, xdp_flags, NULL);
                 if (err < 0)
                         printf("ERROR: failed to detach prog from %s\n", name);
         } else if (!curr_prog_id) {
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c

index 8740838e7767929d0a9bcea961246113ba583914..ae70a7943d85f0e76014999eca5d5d31f123eef9 100644 (file)
--- a/samples/bpf/xdp_sample_user.c
+++ b/samples/bpf/xdp_sample_user.c
@@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
         int ret;
  
         if (prog_id) {
-               ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+               ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
                 if (ret < 0)
                         return -errno;
  
@@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
                 }
         }
  
-       return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       return bpf_xdp_detach(ifindex, xdp_flags, NULL);
  }
  
  int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
@@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
  
         xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
         xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-       ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
-                                 xdp_flags);
+       ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
         if (ret < 0) {
                 ret = -errno;
                 fprintf(stderr,
@@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
                 return ret;
         }
  
-       ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+       ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
         if (ret < 0) {
                 ret = -errno;
                 fprintf(stderr,
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c

index 1d4f305d02aadc9d508313ba75586ed2df82b016..7370c03c96fc0bd0d9aaeb96584c60aaeefaaa34 100644 (file)
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
         __u32 curr_prog_id = 0;
  
         if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                         exit(1);
                 }
                 if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                 else if (!curr_prog_id)
                         printf("couldn't find a prog id on a given iface\n");
                 else
@@ -288,7 +288,7 @@ int main(int argc, char **argv)
                 }
         }
  
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                 printf("link set xdp fd failed\n");
                 return 1;
         }
@@ -302,7 +302,7 @@ int main(int argc, char **argv)
  
         poll_stats(kill_after_s);
  
-       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       bpf_xdp_detach(ifindex, xdp_flags, NULL);
  
         return 0;
  }
diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c

index cc4408797ab702a22cf546e1d6dd1bcac8caabc1..28b5f2a9fa080179a62d44c3407f6e095b3fe9c5 100644 (file)
--- a/samples/bpf/xdpsock_ctrl_proc.c
+++ b/samples/bpf/xdpsock_ctrl_proc.c
@@ -173,7 +173,7 @@ main(int argc, char **argv)
         unlink(SOCKET_NAME);
  
         /* Unset fd for given ifindex */
-       err = bpf_set_link_xdp_fd(ifindex, -1, 0);
+       err = bpf_xdp_detach(ifindex, 0, NULL);
         if (err) {
                 fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
                 return err;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c

index aa50864e4415a508d96ca82aa1bd14dcbc066bf9..19288a2bbc756d3f17deca5a193743467680973e 100644 (file)
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -571,13 +571,13 @@ static void remove_xdp_program(void)
  {
         u32 curr_prog_id = 0;
  
-       if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                 exit(EXIT_FAILURE);
         }
  
         if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+               bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
         else if (!curr_prog_id)
                 printf("couldn't find a prog id on a given interface\n");
         else
@@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
         if (ret)
                 exit_with_error(-ret);
  
-       ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+       ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
         if (ret)
                 exit_with_error(-ret);
  
@@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj)
                 exit(EXIT_FAILURE);
         }
  
-       if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+       if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
                 fprintf(stderr, "ERROR: link set xdp fd failed\n");
                 exit(EXIT_FAILURE);
         }
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c

index 52e7c4ffd2282e7515fbefd6d519702a344113df..2220509588a0b15ae58b15f7e1a5def964cea48d 100644 (file)
--- a/samples/bpf/xsk_fwd.c
+++ b/samples/bpf/xsk_fwd.c
@@ -974,8 +974,8 @@ static void remove_xdp_program(void)
         int i;
  
         for (i = 0 ; i < n_ports; i++)
-               bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
-                                   port_params[i].xsk_cfg.xdp_flags);
+               bpf_xdp_detach(if_nametoindex(port_params[i].iface),
+                              port_params[i].xsk_cfg.xdp_flags, NULL);
  }
  
  int main(int argc, char **argv)
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py

index a6403ddf5de721d40f0066a6c359f033af9f3858..096625242475471746ceb74eeab6b7bc7316b377 100755 (executable)
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -87,21 +87,25 @@ class HeaderParser(object):
          self.line = ''
          self.helpers = []
          self.commands = []
+        self.desc_unique_helpers = set()
+        self.define_unique_helpers = []
+        self.desc_syscalls = []
+        self.enum_syscalls = []
  
      def parse_element(self):
          proto    = self.parse_symbol()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
          return APIElement(proto=proto, desc=desc, ret=ret)
  
      def parse_helper(self):
          proto    = self.parse_proto()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
          return Helper(proto=proto, desc=desc, ret=ret)
  
      def parse_symbol(self):
-        p = re.compile(' \* ?(.+)$')
+        p = re.compile(' \* ?(BPF\w+)$')
          capture = p.match(self.line)
          if not capture:
              raise NoSyscallCommandFound
@@ -127,16 +131,15 @@ class HeaderParser(object):
          self.line = self.reader.readline()
          return capture.group(1)
  
-    def parse_desc(self):
+    def parse_desc(self, proto):
          p = re.compile(' \* ?(?:\t| {5,8})Description$')
          capture = p.match(self.line)
          if not capture:
-            # Helper can have empty description and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No description section found for " + proto)
          # Description can be several lines, some of them possibly empty, and it
          # stops when another subsection title is met.
          desc = ''
+        desc_present = False
          while True:
              self.line = self.reader.readline()
              if self.line == ' *\n':
@@ -145,21 +148,24 @@ class HeaderParser(object):
                  p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                  capture = p.match(self.line)
                  if capture:
+                    desc_present = True
                      desc += capture.group(1) + '\n'
                  else:
                      break
+
+        if not desc_present:
+            raise Exception("No description found for " + proto)
          return desc
  
-    def parse_ret(self):
+    def parse_ret(self, proto):
          p = re.compile(' \* ?(?:\t| {5,8})Return$')
          capture = p.match(self.line)
          if not capture:
-            # Helper can have empty retval and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No return section found for " + proto)
          # Return value description can be several lines, some of them possibly
          # empty, and it stops when another subsection title is met.
          ret = ''
+        ret_present = False
          while True:
              self.line = self.reader.readline()
              if self.line == ' *\n':
@@ -168,44 +174,101 @@ class HeaderParser(object):
                  p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                  capture = p.match(self.line)
                  if capture:
+                    ret_present = True
                      ret += capture.group(1) + '\n'
                  else:
                      break
+
+        if not ret_present:
+            raise Exception("No return found for " + proto)
          return ret
  
-    def seek_to(self, target, help_message):
+    def seek_to(self, target, help_message, discard_lines = 1):
          self.reader.seek(0)
          offset = self.reader.read().find(target)
          if offset == -1:
              raise Exception(help_message)
          self.reader.seek(offset)
          self.reader.readline()
-        self.reader.readline()
+        for _ in range(discard_lines):
+            self.reader.readline()
          self.line = self.reader.readline()
  
-    def parse_syscall(self):
+    def parse_desc_syscall(self):
          self.seek_to('* DOC: eBPF Syscall Commands',
                       'Could not find start of eBPF syscall descriptions list')
          while True:
              try:
                  command = self.parse_element()
                  self.commands.append(command)
+                self.desc_syscalls.append(command.proto)
+
              except NoSyscallCommandFound:
                  break
  
-    def parse_helpers(self):
+    def parse_enum_syscall(self):
+        self.seek_to('enum bpf_cmd {',
+                     'Could not find start of bpf_cmd enum', 0)
+        # Searches for either one or more BPF\w+ enums
+        bpf_p = re.compile('\s*(BPF\w+)+')
+        # Searches for an enum entry assigned to another entry,
+        # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
+        # not documented hence should be skipped in check to
+        # determine if the right number of syscalls are documented
+        assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+        bpf_cmd_str = ''
+        while True:
+            capture = assign_p.match(self.line)
+            if capture:
+                # Skip line if an enum entry is assigned to another entry
+                self.line = self.reader.readline()
+                continue
+            capture = bpf_p.match(self.line)
+            if capture:
+                bpf_cmd_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of BPF\w+
+        self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+
+    def parse_desc_helpers(self):
          self.seek_to('* Start of BPF helper function descriptions:',
                       'Could not find start of eBPF helper descriptions list')
          while True:
              try:
                  helper = self.parse_helper()
                  self.helpers.append(helper)
+                proto = helper.proto_break_down()
+                self.desc_unique_helpers.add(proto['name'])
              except NoHelperFound:
                  break
  
+    def parse_define_helpers(self):
+        # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare
+        # later with the number of unique function names present in description.
+        # Note: seek_to(..) discards the first line below the target search text,
+        # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
+        self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+                     'Could not find start of eBPF helper definition list')
+        # Searches for either one or more FN(\w+) defines or a backslash for newline
+        p = re.compile('\s*(FN\(\w+\))+|\\\\')
+        fn_defines_str = ''
+        while True:
+            capture = p.match(self.line)
+            if capture:
+                fn_defines_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of FN(\w+)
+        self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+
      def run(self):
-        self.parse_syscall()
-        self.parse_helpers()
+        self.parse_desc_syscall()
+        self.parse_enum_syscall()
+        self.parse_desc_helpers()
+        self.parse_define_helpers()
          self.reader.close()
  
  ###############################################################################
@@ -235,6 +298,25 @@ class Printer(object):
              self.print_one(elem)
          self.print_footer()
  
+    def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance):
+        """
+        Checks the number of helpers/syscalls documented within the header file
+        description with those defined as part of enum/macro and raise an
+        Exception if they don't match.
+        """
+        nr_desc_unique_elem = len(desc_unique_elem)
+        nr_define_unique_elem = len(define_unique_elem)
+        if nr_desc_unique_elem != nr_define_unique_elem:
+            exception_msg = '''
+The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d)
+''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem)
+            if nr_desc_unique_elem < nr_define_unique_elem:
+                # Function description is parsed until no helper is found (which can be due to
+                # misformatting). Hence, only print the first missing/misformatted helper/enum.
+                exception_msg += '''
+The description for %s is not present or formatted correctly.
+''' % (define_unique_elem[nr_desc_unique_elem])
+            raise Exception(exception_msg)
  
  class PrinterRST(Printer):
      """
@@ -295,7 +377,6 @@ class PrinterRST(Printer):
  
          print('')
  
-
  class PrinterHelpersRST(PrinterRST):
      """
      A printer for dumping collected information about helpers as a ReStructured
@@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST):
      """
      def __init__(self, parser):
          self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
  
      def print_header(self):
          header = '''\
@@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST):
      """
      def __init__(self, parser):
          self.elements = parser.commands
+        self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
  
      def print_header(self):
          header = '''\
@@ -509,6 +592,7 @@ class PrinterHelpers(Printer):
      """
      def __init__(self, parser):
          self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
  
      type_fwds = [
              'struct bpf_fib_lookup',
diff --git a/security/device_cgroup.c b/security/device_cgroup.c

index 842889f3dcb7065cb0e7b1217cf77f8bc162781c..a9f8c63a96d1a2034de1e50cd7f215bac82e332a 100644 (file)
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
         int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
  
         if (rc)
-               return -EPERM;
+               return rc;
  
         #ifdef CONFIG_CGROUP_DEVICE
         return devcgroup_legacy_check_permission(type, major, minor, access);
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c

index 59833125ac0a13a55f8c54e5a52b3082424000f8..a2c665beda87c2862404a347c2ddffa71e4a48fe 100644 (file)
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -902,7 +902,7 @@ static int do_show(int argc, char **argv)
                                       equal_fn_for_key_as_id, NULL);
         btf_map_table = hashmap__new(hash_fn_for_key_as_id,
                                      equal_fn_for_key_as_id, NULL);
-       if (!btf_prog_table || !btf_map_table) {
+       if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) {
                 hashmap__free(btf_prog_table);
                 hashmap__free(btf_map_table);
                 if (fd >= 0)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c

index 3571a281c43f81329b5e5bccf8add79233b5633e..effe136119d7203acaade217bd98fe564304c8cc 100644 (file)
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                          const char *attach_flags_str,
                          int level)
  {
+       char prog_name[MAX_PROG_FULL_NAME];
         struct bpf_prog_info info = {};
         __u32 info_len = sizeof(info);
         int prog_fd;
@@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                 return -1;
         }
  
+       get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
         if (json_output) {
                 jsonw_start_object(json_wtr);
                 jsonw_uint_field(json_wtr, "id", info.id);
@@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         jsonw_uint_field(json_wtr, "attach_type", attach_type);
                 jsonw_string_field(json_wtr, "attach_flags",
                                    attach_flags_str);
-               jsonw_string_field(json_wtr, "name", info.name);
+               jsonw_string_field(json_wtr, "name", prog_name);
                 jsonw_end_object(json_wtr);
         } else {
                 printf("%s%-8u ", level ? "    " : "", info.id);
@@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         printf("%-15s", attach_type_name[attach_type]);
                 else
                         printf("type %-10u", attach_type);
-               printf(" %-15s %-15s\n", attach_flags_str, info.name);
+               printf(" %-15s %-15s\n", attach_flags_str, prog_name);
         }
  
         close(prog_fd);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c

index fa8eb813434496f358c025d1c88180f8ab464c07..111dff809c7bee72e40d4396facb043d1e5716b8 100644 (file)
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -24,6 +24,7 @@
  #include <bpf/bpf.h>
  #include <bpf/hashmap.h>
  #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
  
  #include "main.h"
  
@@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type)
         return names[type];
  }
  
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len)
+{
+       const char *prog_name = prog_info->name;
+       const struct btf_type *func_type;
+       const struct bpf_func_info finfo;
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       struct btf *prog_btf = NULL;
+
+       if (buff_len <= BPF_OBJ_NAME_LEN ||
+           strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+               goto copy_name;
+
+       if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+               goto copy_name;
+
+       info.nr_func_info = 1;
+       info.func_info_rec_size = prog_info->func_info_rec_size;
+       if (info.func_info_rec_size > sizeof(finfo))
+               info.func_info_rec_size = sizeof(finfo);
+       info.func_info = ptr_to_u64(&finfo);
+
+       if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+               goto copy_name;
+
+       prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+       if (!prog_btf)
+               goto copy_name;
+
+       func_type = btf__type_by_id(prog_btf, finfo.type_id);
+       if (!func_type || !btf_is_func(func_type))
+               goto copy_name;
+
+       prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+       snprintf(name_buff, buff_len, "%s", prog_name);
+
+       if (prog_btf)
+               btf__free(prog_btf);
+}
+
  int get_fd_type(int fd)
  {
         char path[PATH_MAX];
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c

index b4695df2ea3d7db8a94f606f41ee884a5e76357f..43e3f8700ecc93950256f43a328e96c36906313b 100644 (file)
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
                 /* only generate definitions for memory-mapped internal maps */
                 if (!bpf_map__is_internal(map))
                         continue;
-               if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+               if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         continue;
  
                 if (!get_map_ident(map, map_ident, sizeof(map_ident)))
@@ -468,7 +468,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
                 if (!get_map_ident(map, ident, sizeof(ident)))
                         continue;
                 if (bpf_map__is_internal(map) &&
-                   (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         printf("\tmunmap(skel->%1$s, %2$zd);\n",
                                ident, bpf_map_mmap_sz(map));
                 codegen("\
@@ -536,7 +536,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                         continue;
  
                 if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         continue;
  
                 codegen("\
@@ -600,10 +600,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                         continue;
  
                 if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                         continue;
  
-               if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+               if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
                         mmap_flags = "PROT_READ";
                 else
                         mmap_flags = "PROT_READ | PROT_WRITE";
@@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv)
                         s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
                         if (!s)                                             \n\
                                 goto err;                                   \n\
-                       obj->skeleton = s;                                  \n\
                                                                             \n\
                         s->sz = sizeof(*s);                                 \n\
                         s->name = \"%1$s\";                                 \n\
@@ -962,7 +961,7 @@ static int do_skeleton(int argc, char **argv)
                                 i, bpf_map__name(map), i, ident);
                         /* memory-mapped internal maps */
                         if (bpf_map__is_internal(map) &&
-                           (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+                           (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
                                 printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
                                        i, ident);
                         }
@@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv)
                                                                             \n\
                         s->data = (void *)%2$s__elf_bytes(&s->data_sz);     \n\
                                                                             \n\
+                       obj->skeleton = s;                                  \n\
                         return 0;                                           \n\
                 err:                                                        \n\
                         bpf_object__destroy_skeleton(s);                    \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c

index 2c258db0d3521a1224c6f2e479d302eaf8fcfeef..97dec81950e5dead2f5eef76a514f7beb18f7ee0 100644 (file)
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -2,6 +2,7 @@
  /* Copyright (C) 2020 Facebook */
  
  #include <errno.h>
+#include <linux/err.h>
  #include <net/if.h>
  #include <stdio.h>
  #include <unistd.h>
@@ -306,7 +307,7 @@ static int do_show(int argc, char **argv)
         if (show_pinned) {
                 link_table = hashmap__new(hash_fn_for_key_as_id,
                                           equal_fn_for_key_as_id, NULL);
-               if (!link_table) {
+               if (IS_ERR(link_table)) {
                         p_err("failed to create hashmap for pinned paths");
                         return -1;
                 }
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c

index 020e91a542d513255bf6a41f045ab75a1f4fb530..9d01fa9de03376628fb2ec73cfe883f53d1cb858 100644 (file)
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -478,7 +478,14 @@ int main(int argc, char **argv)
         }
  
         if (!legacy_libbpf) {
-               ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+               enum libbpf_strict_mode mode;
+
+               /* Allow legacy map definitions for skeleton generation.
+                * It will still be rejected if users use LIBBPF_STRICT_ALL
+                * mode for loading generated skeleton.
+                */
+               mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
+               ret = libbpf_set_strict_mode(mode);
                 if (ret)
                         p_err("failed to enable libbpf strict mode: %d", ret);
         }
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h

index 8d76d937a62b7097b5e7c865f2f8be5bfaf3c59c..0c3840596b5a72285002883f1dcbc33f1857296f 100644 (file)
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -140,6 +140,10 @@ struct cmd {
  int cmd_select(const struct cmd *cmds, int argc, char **argv,
                int (*help)(int argc, char **argv));
  
+#define MAX_PROG_FULL_NAME 128
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len);
+
  int get_fd_type(int fd);
  const char *get_fd_type_name(enum bpf_obj_type type);
  char *get_fdinfo(int fd, const char *key);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c

index cc530a2298124bf6b588484bcb951830c7e54f05..c66a3c979b7a7cf43ef6d1348fe9d2db9996997f 100644 (file)
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -699,7 +699,7 @@ static int do_show(int argc, char **argv)
         if (show_pinned) {
                 map_table = hashmap__new(hash_fn_for_key_as_id,
                                          equal_fn_for_key_as_id, NULL);
-               if (!map_table) {
+               if (IS_ERR(map_table)) {
                         p_err("failed to create hashmap for pinned paths");
                         return -1;
                 }
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c

index 649053704bd7126db4955326b3ec83f457b431ec..526a332c48e6ebaaa2c9738d0e4fb05324ff19e3 100644 (file)
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
         if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
                 flags |= XDP_FLAGS_HW_MODE;
  
-       return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+       return bpf_xdp_attach(ifindex, progfd, flags, NULL);
  }
  
  static int do_attach(int argc, char **argv)
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c

index 56b598eee043a94d89d31344e4c2113ffc430d4a..7c384d10e95f83d5dd27e2170867c98717ece58c 100644 (file)
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -1,6 +1,7 @@
  // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
  /* Copyright (C) 2020 Facebook */
  #include <errno.h>
+#include <linux/err.h>
  #include <stdbool.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
         libbpf_print_fn_t default_print;
  
         *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
-       if (!*map) {
+       if (IS_ERR(*map)) {
                 p_err("failed to create hashmap for PID references");
                 return -1;
         }
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c

index 2a21d50516bc46ae06947ef6c35e04194685d7c6..cf935c63e6f560076fb2ac0d8b03b18d0b7660e4 100644 (file)
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -424,8 +424,10 @@ out_free:
         free(value);
  }
  
-static void print_prog_header_json(struct bpf_prog_info *info)
+static void print_prog_header_json(struct bpf_prog_info *info, int fd)
  {
+       char prog_name[MAX_PROG_FULL_NAME];
+
         jsonw_uint_field(json_wtr, "id", info->id);
         if (info->type < ARRAY_SIZE(prog_type_name))
                 jsonw_string_field(json_wtr, "type",
@@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info)
         else
                 jsonw_uint_field(json_wtr, "type", info->type);
  
-       if (*info->name)
-               jsonw_string_field(json_wtr, "name", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               jsonw_string_field(json_wtr, "name", prog_name);
+       }
  
         jsonw_name(json_wtr, "tag");
         jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
@@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
         char *memlock;
  
         jsonw_start_object(json_wtr);
-       print_prog_header_json(info);
+       print_prog_header_json(info, fd);
         print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
  
         if (info->load_time) {
@@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
         jsonw_end_object(json_wtr);
  }
  
-static void print_prog_header_plain(struct bpf_prog_info *info)
+static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
  {
+       char prog_name[MAX_PROG_FULL_NAME];
+
         printf("%u: ", info->id);
         if (info->type < ARRAY_SIZE(prog_type_name))
                 printf("%s  ", prog_type_name[info->type]);
         else
                 printf("type %u  ", info->type);
  
-       if (*info->name)
-               printf("name %s  ", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               printf("name %s  ", prog_name);
+       }
  
         printf("tag ");
         fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
@@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
  {
         char *memlock;
  
-       print_prog_header_plain(info);
+       print_prog_header_plain(info, fd);
  
         if (info->load_time) {
                 char buf[32];
@@ -641,7 +649,7 @@ static int do_show(int argc, char **argv)
         if (show_pinned) {
                 prog_table = hashmap__new(hash_fn_for_key_as_id,
                                           equal_fn_for_key_as_id, NULL);
-               if (!prog_table) {
+               if (IS_ERR(prog_table)) {
                         p_err("failed to create hashmap for pinned paths");
                         return -1;
                 }
@@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv)
  
                 if (json_output && nb_fds > 1) {
                         jsonw_start_object(json_wtr);   /* prog object */
-                       print_prog_header_json(&info);
+                       print_prog_header_json(&info, fds[i]);
                         jsonw_name(json_wtr, "insns");
                 } else if (nb_fds > 1) {
-                       print_prog_header_plain(&info);
+                       print_prog_header_plain(&info, fds[i]);
                 }
  
                 err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c

index 2f693b082bdbe5e9b1734f7472cd5a9c88e785ee..e08a6ff2866c36be9db62e75f5ea8059c94b438d 100644 (file)
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv)
  static int do_register(int argc, char **argv)
  {
         LIBBPF_OPTS(bpf_object_open_opts, open_opts);
-       const struct bpf_map_def *def;
         struct bpf_map_info info = {};
         __u32 info_len = sizeof(info);
         int nr_errs = 0, nr_maps = 0;
@@ -510,8 +509,7 @@ static int do_register(int argc, char **argv)
         }
  
         bpf_object__for_each_map(map, obj) {
-               def = bpf_map__def(map);
-               if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+               if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
                         continue;
  
                 link = bpf_map__attach_struct_ops(map);
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile

index 9ddeca947635427323b5480f67baeb39c862eaef..a7f87cdf11da34788f6a1d173811aa3a57b83713 100644 (file)
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -20,6 +20,8 @@ LD       = $(HOSTLD)
  ARCH     = $(HOSTARCH)
  RM      ?= rm
  CROSS_COMPILE =
+CFLAGS  := $(KBUILD_HOSTCFLAGS)
+LDFLAGS := $(KBUILD_HOSTLDFLAGS)
  
  OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
  
@@ -47,10 +49,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
  
  $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
         $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT)    \
-                   DESTDIR=$(LIBBPF_DESTDIR) prefix=                          \
+                   DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
                     $(abspath $@) install_headers
  
-CFLAGS := -g \
+CFLAGS += -g \
            -I$(srctree)/tools/include \
            -I$(srctree)/tools/include/uapi \
            -I$(LIBBPF_INCLUDE) \
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index b0383d371b9af53ea816766fb9fc95cbec277ebe..16a7574292a53a408f7cdaa0c9afb3e7d8710320 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
   *                     *ctx_out*, *data_in* and *data_out* must be NULL.
   *                     *repeat* must be zero.
   *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
   *     Return
   *             Returns zero on success. On error, -1 is returned and *errno*
   *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
   */
  #define BPF_F_SLEEPABLE                (1U << 4)
  
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * the following extensions:
   *
@@ -1775,6 +1782,8 @@ union bpf_attr {
   *             0 on success, or a negative error in case of failure.
   *
   * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
   *     Return
   *             A 64-bit integer containing the current tgid and pid, and
   *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
   *             *current_task*\ **->pid**.
   *
   * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
   *     Return
   *             A 64-bit integer containing the current GID and UID, and
   *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
   *             The 32-bit hash.
   *
   * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
   *     Return
   *             A pointer to the current task struct.
   *
@@ -2369,6 +2382,8 @@ union bpf_attr {
   *             indicate that the hash is outdated and to trigger a
   *             recalculation the next time the kernel tries to access this
   *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
   *
   * long bpf_get_numa_node_id(void)
   *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
   *             A 8-byte long unique number or 0 if *sk* is NULL.
   *
   * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
   *     Return
   *             The owner UID of the socket associated to *skb*. If the socket
   *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
   *             The id is returned or 0 in case the id could not be retrieved.
   *
   * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
   *     Return
   *             A 64-bit integer containing the current cgroup id based
   *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
   *
   *     Return
   *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
         FN(get_func_arg),               \
         FN(get_func_ret),               \
         FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
         /* */
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c

index 550b4cbb6c994267b1ff1401b744655878886b71..418b259166f80548ead6146294848947d1d9c75c 100644 (file)
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
                 .flags = flags,
         );
  
-       return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+       return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
  }
  
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
                           enum bpf_attach_type type,
                           const struct bpf_prog_attach_opts *opts)
  {
@@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
         return libbpf_err_errno(ret);
  }
  
+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+                         enum bpf_attach_type type,
+                         const struct bpf_prog_attach_opts *opts);
+
  int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
  {
         union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h

index 14e0d97ad2cf6f6498245daa25b37eea469fb3a3..c2e8327010f9bed330c2ce0d496f2156eb7e1198 100644 (file)
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -391,6 +391,10 @@ struct bpf_prog_attach_opts {
  
  LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
                                enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+                                    enum bpf_attach_type type,
+                                    const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
  LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
                                      enum bpf_attach_type type,
                                      const struct bpf_prog_attach_opts *opts);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h

index 963b1060d9441f5600bbb13ceaccedaf49f18c8e..44df982d2a5c7685a89b50b89bcd667451210f6e 100644 (file)
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -133,7 +133,7 @@ struct bpf_map_def {
         unsigned int value_size;
         unsigned int max_entries;
         unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
  
  enum libbpf_pin_type {
         LIBBPF_PIN_NONE,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c

index 9aa19c89f75832d10985adad2e6c79a5be962389..1383e26c5d1f131e44480425f5fbcec26e77f029 100644 (file)
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz)
  struct btf_pipe {
         const struct btf *src;
         struct btf *dst;
+       struct hashmap *str_off_map; /* map string offsets from src to dst */
  };
  
  static int btf_rewrite_str(__u32 *str_off, void *ctx)
  {
         struct btf_pipe *p = ctx;
-       int off;
+       void *mapped_off;
+       int off, err;
  
         if (!*str_off) /* nothing to do for empty strings */
                 return 0;
  
+       if (p->str_off_map &&
+           hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+               *str_off = (__u32)(long)mapped_off;
+               return 0;
+       }
+
         off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
         if (off < 0)
                 return off;
  
+       /* Remember string mapping from src to dst.  It avoids
+        * performing expensive string comparisons.
+        */
+       if (p->str_off_map) {
+               err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+               if (err)
+                       return err;
+       }
+
         *str_off = off;
         return 0;
  }
@@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
         return 0;
  }
  
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
  int btf__add_btf(struct btf *btf, const struct btf *src_btf)
  {
         struct btf_pipe p = { .src = src_btf, .dst = btf };
@@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
         if (!off)
                 return libbpf_err(-ENOMEM);
  
+       /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+       p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+       if (IS_ERR(p.str_off_map))
+               return libbpf_err(-ENOMEM);
+
         /* bulk copy types data for all types from src_btf */
         memcpy(t, src_btf->types_data, data_sz);
  
@@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
         btf->hdr->str_off += data_sz;
         btf->nr_types += cnt;
  
+       hashmap__free(p.str_off_map);
+
         /* return type ID of the first added BTF type */
         return btf->start_id + btf->nr_types - cnt;
  err_out:
@@ -1767,6 +1794,8 @@ err_out:
          * wasn't modified, so doesn't need restoring, see big comment above */
         btf->hdr->str_len = old_strs_len;
  
+       hashmap__free(p.str_off_map);
+
         return libbpf_err(err);
  }
  
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h

index 061839f0452558c0f9bf80ec56a86e5f0cef26de..51862fdee850b2f437ceab48b3e677af4d50fc26 100644 (file)
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
                          const struct btf_dump_type_data_opts *opts);
  
  /*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
   */
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC          12      /* Function     */
+#define BTF_KIND_FUNC_PROTO    13      /* Function Proto       */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR           14      /* Variable     */
+#define BTF_KIND_DATASEC       15      /* Section      */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT         16      /* Floating point       */
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG      17      /* Decl Tag */
+#define BTF_KIND_TYPE_TAG      18      /* Type Tag */
+
  static inline __u16 btf_kind(const struct btf_type *t)
  {
         return BTF_INFO_KIND(t->info);
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c

index 3c20b126d60d869aa136b24d4b01c93d6302938e..aeb09c2887162d169c4c8f0d4718a59df8051678 100644 (file)
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
  
  void hashmap__free(struct hashmap *map)
  {
-       if (!map)
+       if (IS_ERR_OR_NULL(map))
                 return;
  
         hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
  
         return true;
  }
-
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 7f10dd501a52bbb03e95c95f0991df2390e5eca5..a8c750373ad56133ec1b353984c83e7681663312 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -235,6 +235,8 @@ enum sec_def_flags {
         SEC_SLEEPABLE = 8,
         /* allow non-strict prefix matching */
         SEC_SLOPPY_PFX = 16,
+       /* BPF program support non-linear XDP buffer */
+       SEC_XDP_FRAGS = 32,
  };
  
  struct bpf_sec_def {
@@ -1937,6 +1939,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
         if (obj->efile.maps_shndx < 0)
                 return 0;
  
+       if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+               pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+               return -EOPNOTSUPP;
+       }
+
         if (!symbols)
                 return -EINVAL;
  
@@ -1999,6 +2006,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                         return -LIBBPF_ERRNO__FORMAT;
                 }
  
+               pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
                 if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
                         pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
                         return -ENOTSUP;
@@ -4190,6 +4199,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
                 return 0;
  
         if (!bpf_map__is_internal(map)) {
+               pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
                                            def->value_size, &key_type_id,
                                            &value_type_id);
@@ -6562,6 +6572,9 @@ static int libbpf_preload_prog(struct bpf_program *prog,
         if (def & SEC_SLEEPABLE)
                 opts->prog_flags |= BPF_F_SLEEPABLE;
  
+       if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+               opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
         if ((prog->type == BPF_PROG_TYPE_TRACING ||
              prog->type == BPF_PROG_TYPE_LSM ||
              prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -8600,8 +8613,11 @@ static const struct bpf_sec_def section_defs[] = {
         SEC_DEF("lsm.s/",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
         SEC_DEF("iter/",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
+       SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
         SEC_DEF("xdp_devmap/",          XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
         SEC_DEF("xdp_cpumap/",          XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -11795,6 +11811,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
  
  void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
  {
+       if (!s)
+               return;
+
         if (s->progs)
                 bpf_object__detach_skeleton(s);
         if (s->obj)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h

index 8b9bc5e90c2b8b32556147cd34c9b37710d63ef1..94670066de629164c82319aaa284e97f8645c24e 100644 (file)
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -706,7 +706,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
  LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
  LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
  /* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
  /* get map name */
  LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
  /* get/set map type */
@@ -832,13 +833,42 @@ struct bpf_xdp_set_link_opts {
  };
  #define bpf_xdp_set_link_opts__last_field old_fd
  
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
  LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
  LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                                         const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
  LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
  LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
                                      size_t info_size, __u32 flags);
  
+struct bpf_xdp_attach_opts {
+       size_t sz;
+       int old_prog_fd;
+       size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
+
+struct bpf_xdp_query_opts {
+       size_t sz;
+       __u32 prog_id;          /* output */
+       __u32 drv_prog_id;      /* output */
+       __u32 hw_prog_id;       /* output */
+       __u32 skb_prog_id;      /* output */
+       __u8 attach_mode;       /* output */
+       size_t :0;
+};
+#define bpf_xdp_query_opts__last_field attach_mode
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
  /* TC related API */
  enum bpf_tc_attach_point {
         BPF_TC_INGRESS = 1 << 0,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map

index 5297839677930bab5a596c1b911f375064d44b9c..e10f0822845af2133585e090dde8d91e21c9a60f 100644 (file)
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -247,6 +247,7 @@ LIBBPF_0.0.8 {
                 bpf_link_create;
                 bpf_link_update;
                 bpf_map__set_initial_value;
+               bpf_prog_attach_opts;
                 bpf_program__attach_cgroup;
                 bpf_program__attach_lsm;
                 bpf_program__is_lsm;
@@ -427,6 +428,10 @@ LIBBPF_0.7.0 {
                 bpf_program__log_level;
                 bpf_program__set_log_buf;
                 bpf_program__set_log_level;
+               bpf_xdp_attach;
+               bpf_xdp_detach;
+               bpf_xdp_query;
+               bpf_xdp_query_id;
                 libbpf_probe_bpf_helper;
                 libbpf_probe_bpf_map_type;
                 libbpf_probe_bpf_prog_type;
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h

index 79131f761a27cca1cbcd0c8fb5b17da56458fdd6..3c2b281c2bc3ec299302711463b52a3589a69bc3 100644 (file)
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -73,6 +73,11 @@ enum libbpf_strict_mode {
          * operation.
          */
         LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+       /*
+        * Error out on any SEC("maps") map definition, which are deprecated
+        * in favor of BTF-defined map definitions in SEC(".maps").
+        */
+       LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
  
         __LIBBPF_STRICT_LAST,
  };
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c

index 39f25e09b51e2cab0a314ec78fa699351b1fd2b9..c39c37f99d5c4949021656dc2c11e1c694e0b41d 100644 (file)
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
         return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
  }
  
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       int old_prog_fd, err;
+
+       if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+               return libbpf_err(-EINVAL);
+
+       old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+       if (old_prog_fd)
+               flags |= XDP_FLAGS_REPLACE;
+       else
+               old_prog_fd = -1;
+
+       err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+       return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
  int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                              const struct bpf_xdp_set_link_opts *opts)
  {
@@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
         return 0;
  }
  
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-                         size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
  {
-       struct xdp_id_md xdp_id = {};
-       __u32 mask;
-       int ret;
         struct libbpf_nla_req req = {
                 .nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
                 .nh.nlmsg_type     = RTM_GETLINK,
                 .nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
                 .ifinfo.ifi_family = AF_PACKET,
         };
+       struct xdp_id_md xdp_id = {};
+       int err;
  
-       if (flags & ~XDP_FLAGS_MASK || !info_size)
+       if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+               return libbpf_err(-EINVAL);
+
+       if (xdp_flags & ~XDP_FLAGS_MASK)
                 return libbpf_err(-EINVAL);
  
         /* Check whether the single {HW,DRV,SKB} mode is set */
-       flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
-       mask = flags - 1;
-       if (flags && flags & mask)
+       xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+       if (xdp_flags & (xdp_flags - 1))
                 return libbpf_err(-EINVAL);
  
         xdp_id.ifindex = ifindex;
-       xdp_id.flags = flags;
+       xdp_id.flags = xdp_flags;
  
-       ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+       err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
                                        get_xdp_info, &xdp_id);
-       if (!ret) {
-               size_t sz = min(info_size, sizeof(xdp_id.info));
+       if (err)
+               return libbpf_err(err);
  
-               memcpy(info, &xdp_id.info, sz);
-               memset((void *) info + sz, 0, info_size - sz);
-       }
+       OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+       OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+       OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+       OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+       OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
  
-       return libbpf_err(ret);
+       return 0;
  }
  
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+                         size_t info_size, __u32 flags)
  {
-       flags &= XDP_FLAGS_MODES;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+       size_t sz;
+       int err;
+
+       if (!info_size)
+               return libbpf_err(-EINVAL);
  
-       if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
-               return info->prog_id;
-       if (flags & XDP_FLAGS_DRV_MODE)
-               return info->drv_prog_id;
-       if (flags & XDP_FLAGS_HW_MODE)
-               return info->hw_prog_id;
-       if (flags & XDP_FLAGS_SKB_MODE)
-               return info->skb_prog_id;
+       err = bpf_xdp_query(ifindex, flags, &opts);
+       if (err)
+               return libbpf_err(err);
+
+       /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+        * layout after sz field
+        */
+       sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+       memcpy(info, &opts.prog_id, sz);
+       memset((void *)info + sz, 0, info_size - sz);
  
         return 0;
  }
  
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
  {
-       struct xdp_link_info info;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
         int ret;
  
-       ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
-       if (!ret)
-               *prog_id = get_xdp_id(&info, flags);
+       ret = bpf_xdp_query(ifindex, flags, &opts);
+       if (ret)
+               return libbpf_err(ret);
+
+       flags &= XDP_FLAGS_MODES;
  
-       return libbpf_err(ret);
+       if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+               *prog_id = opts.prog_id;
+       else if (flags & XDP_FLAGS_DRV_MODE)
+               *prog_id = opts.drv_prog_id;
+       else if (flags & XDP_FLAGS_HW_MODE)
+               *prog_id = opts.hw_prog_id;
+       else if (flags & XDP_FLAGS_SKB_MODE)
+               *prog_id = opts.skb_prog_id;
+       else
+               *prog_id = 0;
+
+       return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+       return bpf_xdp_query_id(ifindex, flags, prog_id);
  }
  
  typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c

index 7ecfaac7536a1b1b562f0d3cbec5abeee29a6871..ef2832b4d5ccdd8bc19306ce032b60833f45f567 100644 (file)
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1005,24 +1005,22 @@ __bpf_map__config_value(struct bpf_map *map,
  {
         struct bpf_map_op *op;
         const char *map_name = bpf_map__name(map);
-       const struct bpf_map_def *def = bpf_map__def(map);
  
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
-       if (def->type != BPF_MAP_TYPE_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
                 pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
         }
-       if (def->key_size < sizeof(unsigned int)) {
+       if (bpf_map__key_size(map) < sizeof(unsigned int)) {
                 pr_debug("Map %s has incorrect key size\n", map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
         }
-       switch (def->value_size) {
+       switch (bpf_map__value_size(map)) {
         case 1:
         case 2:
         case 4:
@@ -1064,7 +1062,6 @@ __bpf_map__config_event(struct bpf_map *map,
                         struct parse_events_term *term,
                         struct evlist *evlist)
  {
-       const struct bpf_map_def *def;
         struct bpf_map_op *op;
         const char *map_name = bpf_map__name(map);
         struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
@@ -1075,18 +1072,16 @@ __bpf_map__config_event(struct bpf_map *map,
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
         }
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
-               return PTR_ERR(def);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
+               return PTR_ERR(map);
         }
  
         /*
          * No need to check key_size and value_size:
          * kernel has already checked them.
          */
-       if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
                 pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1135,7 +1130,6 @@ config_map_indices_range_check(struct parse_events_term *term,
                                const char *map_name)
  {
         struct parse_events_array *array = &term->array;
-       const struct bpf_map_def *def;
         unsigned int i;
  
         if (!array->nr_ranges)
@@ -1146,10 +1140,8 @@ config_map_indices_range_check(struct parse_events_term *term,
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
@@ -1158,7 +1150,7 @@ config_map_indices_range_check(struct parse_events_term *term,
                 size_t length = array->ranges[i].length;
                 unsigned int idx = start + length - 1;
  
-               if (idx >= def->max_entries) {
+               if (idx >= bpf_map__max_entries(map)) {
                         pr_debug("ERROR: index %d too large\n", idx);
                         return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
                 }
@@ -1251,21 +1243,21 @@ out:
  }
  
  typedef int (*map_config_func_t)(const char *name, int map_fd,
-                                const struct bpf_map_def *pdef,
+                                const struct bpf_map *map,
                                  struct bpf_map_op *op,
                                  void *pkey, void *arg);
  
  static int
  foreach_key_array_all(map_config_func_t func,
                       void *arg, const char *name,
-                     int map_fd, const struct bpf_map_def *pdef,
+                     int map_fd, const struct bpf_map *map,
                       struct bpf_map_op *op)
  {
         unsigned int i;
         int err;
  
-       for (i = 0; i < pdef->max_entries; i++) {
-               err = func(name, map_fd, pdef, op, &i, arg);
+       for (i = 0; i < bpf_map__max_entries(map); i++) {
+               err = func(name, map_fd, map, op, &i, arg);
                 if (err) {
                         pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                  name, i);
@@ -1278,7 +1270,7 @@ foreach_key_array_all(map_config_func_t func,
  static int
  foreach_key_array_ranges(map_config_func_t func, void *arg,
                          const char *name, int map_fd,
-                        const struct bpf_map_def *pdef,
+                        const struct bpf_map *map,
                          struct bpf_map_op *op)
  {
         unsigned int i, j;
@@ -1291,7 +1283,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg,
                 for (j = 0; j < length; j++) {
                         unsigned int idx = start + j;
  
-                       err = func(name, map_fd, pdef, op, &idx, arg);
+                       err = func(name, map_fd, map, op, &idx, arg);
                         if (err) {
                                 pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                          name, idx);
@@ -1307,9 +1299,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                            map_config_func_t func,
                            void *arg)
  {
-       int err, map_fd;
+       int err, map_fd, type;
         struct bpf_map_op *op;
-       const struct bpf_map_def *def;
         const char *name = bpf_map__name(map);
         struct bpf_map_priv *priv = bpf_map__priv(map);
  
@@ -1322,9 +1313,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                 return 0;
         }
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: failed to get definition from map %s\n", name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
         map_fd = bpf_map__fd(map);
@@ -1333,19 +1323,19 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                 return map_fd;
         }
  
+       type = bpf_map__type(map);
         list_for_each_entry(op, &priv->ops_list, list) {
-               switch (def->type) {
+               switch (type) {
                 case BPF_MAP_TYPE_ARRAY:
                 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
                         switch (op->key_type) {
                         case BPF_MAP_KEY_ALL:
                                 err = foreach_key_array_all(func, arg, name,
-                                                           map_fd, def, op);
+                                                           map_fd, map, op);
                                 break;
                         case BPF_MAP_KEY_RANGES:
                                 err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, def,
-                                                              op);
+                                                              map_fd, map, op);
                                 break;
                         default:
                                 pr_debug("ERROR: keytype for map '%s' invalid\n",
@@ -1454,7 +1444,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
  
  static int
  apply_obj_config_map_for_key(const char *name, int map_fd,
-                            const struct bpf_map_def *pdef,
+                            const struct bpf_map *map,
                              struct bpf_map_op *op,
                              void *pkey, void *arg __maybe_unused)
  {
@@ -1463,7 +1453,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
         switch (op->op_type) {
         case BPF_MAP_OP_SET_VALUE:
                 err = apply_config_value_for_key(map_fd, pkey,
-                                                pdef->value_size,
+                                                bpf_map__value_size(map),
                                                  op->v.value);
                 break;
         case BPF_MAP_OP_SET_EVSEL:
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c

index eb853ca67cf4213904ae877df00c6a93083bc491..c863ae0c5cb5b791836b6c2c31141fc639b2f955 100644 (file)
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -9,25 +9,25 @@
  #include <stdlib.h>
  #include <unistd.h>
  
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
  {
-       return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-              def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+       return type == BPF_MAP_TYPE_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+              type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
  }
  
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
  {
-       if (bpf_map_def__is_per_cpu(def))
-               return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+       if (bpf_map__is_per_cpu(bpf_map__type(map)))
+               return malloc(round_up(bpf_map__value_size(map), 8) *
+                             sysconf(_SC_NPROCESSORS_CONF));
  
-       return malloc(def->value_size);
+       return malloc(bpf_map__value_size(map));
  }
  
  int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
  {
-       const struct bpf_map_def *def = bpf_map__def(map);
         void *prev_key = NULL, *key, *value;
         int fd = bpf_map__fd(map), err;
         int printed = 0;
@@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
         if (fd < 0)
                 return fd;
  
-       if (IS_ERR(def))
-               return PTR_ERR(def);
+       if (!map)
+               return PTR_ERR(map);
  
         err = -ENOMEM;
-       key = malloc(def->key_size);
+       key = malloc(bpf_map__key_size(map));
         if (key == NULL)
                 goto out;
  
-       value = bpf_map_def__alloc_value(def);
+       value = bpf_map__alloc_value(map);
         if (value == NULL)
                 goto out_free_key;
  
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 42ffc24e9e71050bd64112f9a1ed9673eb8aa70d..945f92d71db3d04d63e8f4df4dd3d11f76232be7 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
  
  BPF_GCC                ?= $(shell command -v bpf-gcc;)
  SAN_CFLAGS     ?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)             \
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)     \
           -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)          \
           -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
  LDFLAGS += $(SAN_CFLAGS)
@@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
  MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
  
  CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)                  \
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)          \
              -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)                   \
              -I$(abspath $(OUTPUT)/../usr/include)
  
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c

index df3b292a8ffec06b0d902b6eb815f40742a35aad..bdbacf5adcd26c5fc590eb8a78f9db97bcf9fe82 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
         .write = bpf_testmod_test_write,
  };
  
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
  BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
  
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_testmod_check_kfunc_ids,
+};
+
+extern int bpf_fentry_test1(int a);
  
  static int bpf_testmod_init(void)
  {
         int ret;
  
-       ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+       if (ret < 0)
                 return ret;
-       register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
-       return 0;
+       if (bpf_fentry_test1(0) < 0)
+               return -EINVAL;
+       return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
  }
  
  static void bpf_testmod_exit(void)
  {
-       unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
         return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
  }
  
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config

index f6287132fa89a763f06acc31128c3aa5aea9d6d5..763db63a38908185c4fc85379168beb7166b77df 100644 (file)
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y
  CONFIG_BLK_DEV_LOOP=y
  CONFIG_FUNCTION_TRACER=y
  CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c

index d0f06e40c16d0b0f8c034957042d40e83cdb5c1b..eac71fbb24ce2182b13e75b6d71bfb0d08742b79 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,13 +1,24 @@
  // SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
  #include <sys/types.h>
  #include <sys/socket.h>
  #include <sys/capability.h>
  
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
  static int duration;
  
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       return 0;
+}
+
  void try_bind(int family, int port, int expected_errno)
  {
         struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
         struct bind_perm *skel;
         int cgroup_fd;
  
+       if (create_netns())
+               return;
+
         cgroup_fd = test__join_cgroup("/bind_perm");
         if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
                 return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c

new file mode 100644 (file)

index 0000000..ee725d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+       struct sockaddr_un addr = {
+               .sun_family = AF_UNIX,
+               .sun_path = "",
+       };
+       socklen_t len;
+       int fd, err;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (!ASSERT_NEQ(fd, -1, "socket"))
+               return -1;
+
+       len = offsetof(struct sockaddr_un, sun_path);
+       err = bind(fd, (struct sockaddr *)&addr, len);
+       if (!ASSERT_OK(err, "bind"))
+               return -1;
+
+       len = sizeof(addr);
+       err = getsockname(fd, (struct sockaddr *)&addr, &len);
+       if (!ASSERT_OK(err, "getsockname"))
+               return -1;
+
+       memcpy(&skel->bss->sun_path, &addr.sun_path,
+              len - offsetof(struct sockaddr_un, sun_path));
+
+       return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+       socklen_t optlen;
+       int i, err;
+
+       for (i = 0; i < NR_CASES; i++) {
+               if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+                               "bpf_(get|set)sockopt"))
+                       return;
+
+               err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->data->sndbuf_setsockopt[i]),
+                                sizeof(skel->data->sndbuf_setsockopt[i]));
+               if (!ASSERT_OK(err, "setsockopt"))
+                       return;
+
+               optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+               err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->bss->sndbuf_getsockopt_expected[i]),
+                                &optlen);
+               if (!ASSERT_OK(err, "getsockopt"))
+                       return;
+
+               if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+                              skel->bss->sndbuf_getsockopt_expected[i],
+                              "bpf_(get|set)sockopt"))
+                       return;
+       }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+       struct bpf_iter_setsockopt_unix *skel;
+       int err, unix_fd, iter_fd;
+       char buf;
+
+       skel = bpf_iter_setsockopt_unix__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       unix_fd = create_unix_socket(skel);
+       if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+               goto destroy;
+
+       skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+       if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+               goto destroy;
+
+       iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+       if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+               goto destroy;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto destroy;
+
+       test_sndbuf(skel, unix_fd);
+destroy:
+       bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c

new file mode 100644 (file)

index 0000000..d43f548
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+       const char *str_open;
+       void *(*bpf_open_and_load)();
+       void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+       _TS_INVALID,
+       TS_MODULE_LOAD,
+       TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+       return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+       return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+       int ret, fd;
+
+       fd = open("bpf_testmod.ko", O_RDONLY);
+       if (fd < 0)
+               return fd;
+
+       ret = sys_finit_module(fd, "", 0);
+       close(fd);
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+       if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+               atomic_store(&state, TS_MODULE_LOAD);
+       else
+               atomic_store(&state, TS_MODULE_LOAD_FAIL);
+       return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+       return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+       struct uffdio_register uffd_register = {};
+       struct uffdio_api uffd_api = {};
+       int uffd;
+
+       uffd = sys_userfaultfd(O_CLOEXEC);
+       if (uffd < 0)
+               return -errno;
+
+       uffd_api.api = UFFD_API;
+       uffd_api.features = 0;
+       if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+               close(uffd);
+               return -1;
+       }
+
+       uffd_register.range.start = (unsigned long)fault_addr;
+       uffd_register.range.len = 4096;
+       uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+       if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+               close(uffd);
+               return -1;
+       }
+       return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+       void *fault_addr, *skel_fail;
+       struct bpf_mod_race *skel;
+       struct uffd_msg uffd_msg;
+       pthread_t load_mod_thrd;
+       _Atomic int *blockingp;
+       int uffd, ret;
+
+       fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+               return;
+
+       if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+               goto end_mmap;
+
+       skel = bpf_mod_race__open();
+       if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+               goto end_module;
+
+       skel->rodata->bpf_mod_race_config.tgid = getpid();
+       skel->rodata->bpf_mod_race_config.inject_error = -4242;
+       skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+       if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+               goto end_destroy;
+       blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+       if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+               goto end_destroy;
+
+       uffd = test_setup_uffd(fault_addr);
+       if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+               goto end_destroy;
+
+       if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+                      "load module thread"))
+               goto end_uffd;
+
+       /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+       while (!atomic_load(&state) && !atomic_load(blockingp))
+               ;
+       if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+               goto end_join;
+       if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+               pthread_kill(load_mod_thrd, SIGKILL);
+               goto end_uffd;
+       }
+
+       /* We might have set bpf_blocking to 1, but may have not blocked in
+        * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+        */
+       if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+                      "read uffd block event"))
+               goto end_join;
+       if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+               goto end_join;
+
+       /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+        * module state is still MODULE_STATE_COMING because mod->init hasn't
+        * returned. This is the time we try to load a program calling kfunc and
+        * check if we get ENXIO from verifier.
+        */
+       skel_fail = config->bpf_open_and_load();
+       ret = errno;
+       if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+               /* Close uffd to unblock load_mod_thrd */
+               close(uffd);
+               uffd = -1;
+               while (atomic_load(blockingp) != 2)
+                       ;
+               ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+               config->bpf_destroy(skel_fail);
+               goto end_join;
+
+       }
+       ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+       ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+       close(uffd);
+       uffd = -1;
+end_join:
+       pthread_join(load_mod_thrd, NULL);
+       if (uffd < 0)
+               ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+       if (uffd >= 0)
+               close(uffd);
+end_destroy:
+       bpf_mod_race__destroy(skel);
+       ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+       sys_delete_module("bpf_testmod", 0);
+       ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+       munmap(fault_addr, 4096);
+       atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+       .str_open = "ksym_race__open_and_load",
+       .bpf_open_and_load = (void *)ksym_race__open_and_load,
+       .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+       .str_open = "kfunc_call_race__open_and_load",
+       .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+       .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+       if (test__start_subtest("ksym (used_btfs UAF)"))
+               test_bpf_mod_race_config(&ksym_config);
+       if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+               test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c

new file mode 100644 (file)

index 0000000..e3166a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+       TEST_XDP,
+       TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+       struct test_bpf_nf *skel;
+       int prog_fd, err, retval;
+
+       skel = test_bpf_nf__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+               return;
+
+       if (mode == TEST_XDP)
+               prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+       else
+               prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+                               (__u32 *)&retval, NULL);
+       if (!ASSERT_OK(err, "bpf_prog_test_run"))
+               goto end;
+
+       ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+       ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+       ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+       ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+       ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+       ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+       ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+       ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+       test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+       if (test__start_subtest("xdp-ct"))
+               test_bpf_nf_ct(TEST_XDP);
+       if (test__start_subtest("tc-bpf-ct"))
+               test_bpf_nf_ct(TEST_TC_BPF);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c

index 8ba53acf9eb47c99b2babeadea5201f6cba7b5a5..14f9b6136794832b3fc3d558be55d87615914541 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num)
         has_btf_ext = btf_ext != NULL;
         btf_ext__free(btf_ext);
  
+       /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+       libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
         obj = bpf_object__open(test->file);
         err = libbpf_get_error(obj);
         if (CHECK(err, "obj: %d", err))
@@ -4684,6 +4686,8 @@ skip:
         fprintf(stderr, "OK");
  
  done:
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
         btf__free(btf);
         free(func_info);
         bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c

index d3e8f729c6235883969fd9953dc999b933edad27..38b3c47293da9256e1caa51769a987ad538f1912 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void)
  
         attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
         attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_override", "unexpected success\n"))
                 goto err;
         CHECK_FAIL(errno != EINVAL);
  
         attach_opts.flags = BPF_F_REPLACE;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_no_multi", "unexpected success\n"))
                 goto err;
@@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void)
  
         attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
         attach_opts.replace_prog_fd = -1;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_bad_fd", "unexpected success\n"))
                 goto err;
@@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void)
  
         /* replacing a program that is not attached to cgroup should fail  */
         attach_opts.replace_prog_fd = allow_prog[3];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                          BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "fail_prog_replace_no_ent", "unexpected success\n"))
                 goto err;
@@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void)
  
         /* replace 1st from the top program */
         attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "prog_replace", "errno=%d\n", errno))
                 goto err;
  
         /* replace program with itself */
         attach_opts.replace_prog_fd = allow_prog[6];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                   "prog_replace", "errno=%d\n", errno))
                 goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c

new file mode 100644 (file)

index 0000000..0b47c3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM     0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, assert that
+        * we actually get that error when we run setsockopt()
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, and one that gets the
+        * previously set errno. Assert that we get the same errno back.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno.
+        * Assert that, without anything setting one, we get 0.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                 &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno, and then
+        * one that sets the errno to EUNATCH. Assert that the get does not
+        * see EUNATCH set later, and does not prevent EUNATCH from being set.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+        * and then one that gets the exported errno. Assert both the syscall
+        * and the helper sees the last set errno.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that return a reject without setting errno
+        * (legacy reject), and one that gets the errno. Assert that for
+        * backward compatibility the syscall result in EPERM, and this
+        * is also visible to the helper.
+        */
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that return a reject
+        * without setting errno, and then one that gets the exported errno.
+        * Assert both the syscall and the helper's errno are unaffected by
+        * the second prog (i.e. legacy rejects does not override the errno
+        * to EPERM).
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that gets previously set errno. Assert that the
+        * error from kernel is in both ctx_retval_value and retval_value.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+        * overrides the value from kernel.
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+        * ctx retval. Assert that the clearing ctx retval is synced to helper
+        * and clears any errors both from kernel and BPF..
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                 &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_clear_retval);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+       int cgroup_fd = -1;
+       int sock_fd = -1;
+
+       cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+       if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+               goto close_fd;
+
+       sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+       if (!ASSERT_GE(sock_fd, 0, "start-server"))
+               goto close_fd;
+
+       if (test__start_subtest("setsockopt-set"))
+               test_setsockopt_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-set_and_get"))
+               test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero"))
+               test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero_and_set"))
+               test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-override"))
+               test_setsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_eperm"))
+               test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_no_override"))
+               test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-get"))
+               test_getsockopt_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-override"))
+               test_getsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-retval_sync"))
+               test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+       close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c

index ac54e3f91d42fd22ffbe16189f7b6cef96830678..dfafd62df50b3338b160409fe31da91a14d7e3ab 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
         if (map_fd < 0)
                 return -1;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c

index 9da131b32e13e328bb7db93d52f961a36e37520a..917165e0442798c12e330b383e9e4ba1ead9c63f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
         if (CHECK_FAIL(map_fd < 0))
                 return;
  
-       buff = malloc(bpf_map__def(map)->value_size);
+       buff = malloc(bpf_map__value_size(map));
         if (buff)
                 err = bpf_map_update_elem(map_fd, &zero, buff, 0);
         free(buff);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c

index 1db86eab101b3018ce747f6750ce742dbad27113..57331c60696461aeec83af4629faec07cc755f8a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -20,7 +20,7 @@ void test_global_data_init(void)
         if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
                 goto out;
  
-       sz = bpf_map__def(map)->value_size;
+       sz = bpf_map__value_size(map);
         newval = malloc(sz);
         if (CHECK_FAIL(!newval))
                 goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c

index 7d7445ccc14102df4be6c19338746ca7b826253b..b39a4f09aefdb7e740f57038cb2f479928317666 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -27,6 +27,12 @@ static void test_main(void)
         ASSERT_OK(err, "bpf_prog_test_run(test2)");
         ASSERT_EQ(retval, 3, "test2-retval");
  
+       prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               NULL, NULL, (__u32 *)&retval, NULL);
+       ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+       ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+
         kfunc_call_test_lskel__destroy(skel);
  }
  
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c

index 85db0f4cdd956b854e36a3883397e6514b7dfbc2..b97a8f236b3a84fce34238478bea0a732d7155e6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -8,6 +8,7 @@
  #include "test_sockmap_update.skel.h"
  #include "test_sockmap_invalid_update.skel.h"
  #include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
  #include "bpf_iter_sockmap.skel.h"
  
  #define TCP_REPAIR             19      /* TCP sock is under repair right now */
@@ -315,6 +316,63 @@ out:
         test_sockmap_skb_verdict_attach__destroy(skel);
  }
  
+static __u32 query_prog_id(int prog_fd)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       int err;
+
+       err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+           !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+               return 0;
+
+       return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+       struct test_sockmap_progs_query *skel;
+       int err, map_fd, verdict_fd;
+       __u32 attach_flags = 0;
+       __u32 prog_ids[3] = {};
+       __u32 prog_cnt = 3;
+
+       skel = test_sockmap_progs_query__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+               return;
+
+       map_fd = bpf_map__fd(skel->maps.sock_map);
+
+       if (attach_type == BPF_SK_MSG_VERDICT)
+               verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+       else
+               verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags,  0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+       err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+       if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+               goto out;
+
+       prog_cnt = 1;
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+       ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+                 "wrong prog_ids on query");
+
+       bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+       test_sockmap_progs_query__destroy(skel);
+}
+
  void test_sockmap_basic(void)
  {
         if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +399,12 @@ void test_sockmap_basic(void)
                 test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
                                                 BPF_SK_SKB_VERDICT);
         }
+       if (test__start_subtest("sockmap msg_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+       if (test__start_subtest("sockmap stream_parser progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+       if (test__start_subtest("sockmap stream_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+       if (test__start_subtest("sockmap skb_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

index 7e21bfab6358c0be9efc08c859cd4e4119148268..2cf0c7a3fe2326c03076281030751a36f6940901 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1413,14 +1413,12 @@ close_srv1:
  
  static void test_ops_cleanup(const struct bpf_map *map)
  {
-       const struct bpf_map_def *def;
         int err, mapfd;
         u32 key;
  
-       def = bpf_map__def(map);
         mapfd = bpf_map__fd(map);
  
-       for (key = 0; key < def->max_entries; key++) {
+       for (key = 0; key < bpf_map__max_entries(map); key++) {
                 err = bpf_map_delete_elem(mapfd, &key);
                 if (err && errno != EINVAL && errno != ENOENT)
                         FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family)
  
  static const char *map_type_str(const struct bpf_map *map)
  {
-       const struct bpf_map_def *def;
+       int type;
  
-       def = bpf_map__def(map);
-       if (IS_ERR(def))
+       if (!map)
                 return "invalid";
+       type = bpf_map__type(map);
  
-       switch (def->type) {
+       switch (type) {
         case BPF_MAP_TYPE_SOCKMAP:
                 return "sockmap";
         case BPF_MAP_TYPE_SOCKHASH:
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c

index 4b937e5dbacae4e882033b4772bfee2852636cfc..30a99d2ed5c62c18e080d0a1f14ae8078020008d 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -173,11 +173,11 @@ static int getsetsockopt(void)
         }
  
         memset(&buf, 0, sizeof(buf));
-       buf.zc.address = 12345; /* rejected by BPF */
+       buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
         optlen = sizeof(buf.zc);
         errno = 0;
         err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
-       if (errno != EPERM) {
+       if (errno != EINVAL) {
                 log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
                         err, errno);
                 goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c

index 5dc0f425bd11db9aebac13055cdf4b7b15e65d05..796f231582f8f5cbf546857d18586d5cb31ba117 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -37,7 +37,7 @@ static void test_tailcall_1(void)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -53,7 +53,7 @@ static void test_tailcall_1(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                         &duration, &retval, NULL);
                 CHECK(err || retval != i, "tailcall",
@@ -69,7 +69,7 @@ static void test_tailcall_1(void)
         CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
               err, errno, retval);
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -90,8 +90,8 @@ static void test_tailcall_1(void)
         CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
               err, errno, retval);
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -107,8 +107,8 @@ static void test_tailcall_1(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
  
                 err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                         &duration, &retval, NULL);
@@ -125,7 +125,7 @@ static void test_tailcall_1(void)
         CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
               err, errno, retval);
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_delete_elem(map_fd, &i);
                 if (CHECK_FAIL(err >= 0 || errno != ENOENT))
                         goto out;
@@ -175,7 +175,7 @@ static void test_tailcall_2(void)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -353,7 +353,7 @@ static void test_tailcall_4(void)
         if (CHECK_FAIL(map_fd < 0))
                 return;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -369,7 +369,7 @@ static void test_tailcall_4(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -380,7 +380,7 @@ static void test_tailcall_4(void)
                       "err %d errno %d retval %d\n", err, errno, retval);
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -441,7 +441,7 @@ static void test_tailcall_5(void)
         if (CHECK_FAIL(map_fd < 0))
                 return;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -457,7 +457,7 @@ static void test_tailcall_5(void)
                         goto out;
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -468,7 +468,7 @@ static void test_tailcall_5(void)
                       "err %d errno %d retval %d\n", err, errno, retval);
         }
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                 if (CHECK_FAIL(err))
                         goto out;
@@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void)
                 goto out;
  
         /* nop -> jmp */
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
         if (CHECK_FAIL(map_fd < 0))
                 goto out;
  
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
  
                 prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c

new file mode 100644 (file)

index 0000000..31c1886
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_xdp_update_frags(void)
+{
+       const char *file = "./test_xdp_update_frags.o";
+       __u32 duration, retval, size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u32 *offset;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(128);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+               goto out;
+
+       memset(buf, 0, 128);
+       offset = (__u32 *)buf;
+       *offset = 16;
+       buf[*offset] = 0xaa;            /* marker at offset 16 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 31 (head) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 128,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+       ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+       free(buf);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 5000;
+       buf[*offset] = 0xaa;            /* marker at offset 5000 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 5015 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+       ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 3510;
+       buf[*offset] = 0xaa;            /* marker at offset 3510 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 3525 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+       ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 7606;
+       buf[*offset] = 0xaa;            /* marker at offset 7606 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 7621 (frag1) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+       ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+       if (test__start_subtest("xdp_adjust_frags"))
+               test_xdp_update_frags();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c

index 3f5a17c38be596e0f6fb9fa0561e80249460474e..ccc9e63254a8983463ad129add0e533753d292a5 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void)
         char buf[128];
  
         err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
                 return;
  
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                 buf, &size, &retval, &duration);
-
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
  
         expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
                                 buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
+
         bpf_object__close(obj);
  }
  
@@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void)
         int err, prog_fd;
  
         err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                 return;
  
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                 buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
  
         expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
         err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
                                 buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
  
         bpf_object__close(obj);
  }
@@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void)
         };
  
         err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
-       if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                 return;
  
         /* Test case-64 */
@@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void)
         /* Kernel side alloc packet memory area that is zero init */
         err = bpf_prog_test_run_xattr(&tattr);
  
-       CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != 192, /* Expected grow size */
-                  "case-64",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+       ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
  
         /* Extra checks for data contents */
-       CHECK_ATTR(tattr.data_size_out != 192
-                  || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
-                  || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
-                  || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
-                  "case-64-data",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /*  0-63  memset to 1 */
+       ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+       ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+       ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+       ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+       ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
  
         /* Test case-128 */
         memset(buf, 2, sizeof(buf));
@@ -109,24 +103,139 @@ static void test_xdp_adjust_tail_grow2(void)
         err = bpf_prog_test_run_xattr(&tattr);
  
         max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
-       CHECK_ATTR(err
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != max_grow,/* Expect max grow size */
-                  "case-128",
-                  "err %d errno %d retval %d size %d expect-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, max_grow);
+       ASSERT_OK(err, "case-128");
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
  
         /* Extra checks for data content: Count grow size, will contain zeros */
         for (i = 0, cnt = 0; i < sizeof(buf); i++) {
                 if (buf[i] == 0)
                         cnt++;
         }
-       CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
-                  || tattr.data_size_out != max_grow, /* Total grow size */
-                  "case-128-data",
-                  "err %d errno %d retval %d size %d grow-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, cnt);
+       ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_shrink(void)
+{
+       const char *file = "./test_xdp_adjust_tail_shrink.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u8 *buf;
+
+       /* For the individual test cases, the first byte in the packet
+        * indicates which test will be run.
+        */
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+
+       /* Test case removing 10 bytes from last frag, NOT freeing it */
+       exp_size = 8990; /* 9000 - 10 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb-10b size");
+
+       /* Test case removing one of two pages, assuming 4K pages */
+       buf[0] = 1;
+       exp_size = 4900; /* 9000 - 4100 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-4Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+
+       /* Test case removing two pages resulting in a linear xdp_buff */
+       buf[0] = 2;
+       exp_size = 800; /* 9000 - 8200 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-9Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_grow(void)
+{
+       const char *file = "./test_xdp_adjust_tail_grow.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, i, prog_fd;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(16384);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+               goto out;
+
+       /* Test case add 10 bytes to last frag */
+       memset(buf, 1, 16384);
+       size = 9000;
+       exp_size = size + 10;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+       for (i = 0; i < 9000; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+       for (i = 9000; i < 9010; i++)
+               ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+       for (i = 9010; i < 16384; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+       /* Test a too large grow */
+       memset(buf, 1, 16384);
+       size = 9001;
+       exp_size = size;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
  
+       free(buf);
+out:
         bpf_object__close(obj);
  }
  
@@ -138,4 +247,8 @@ void test_xdp_adjust_tail(void)
                 test_xdp_adjust_tail_grow();
         if (test__start_subtest("xdp_adjust_tail_grow2"))
                 test_xdp_adjust_tail_grow2();
+       if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+               test_xdp_adjust_frags_tail_shrink();
+       if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+               test_xdp_adjust_frags_tail_grow();
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

index c98a897ad69226df3df41bfbd2311bdd004fde07..9c395ea680c683e3c2bcd13c44c5ae83aab8a61c 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -10,40 +10,97 @@ struct meta {
         int pkt_len;
  };
  
+struct test_ctx_s {
+       bool passed;
+       int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
  static void on_sample(void *ctx, int cpu, void *data, __u32 size)
  {
-       int duration = 0;
         struct meta *meta = (struct meta *)data;
         struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+       unsigned char *raw_pkt = data + sizeof(*meta);
+       struct test_ctx_s *tst_ctx = ctx;
+
+       ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+       ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+       ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+       ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+                 "check_packet_content");
+
+       if (meta->pkt_len > sizeof(pkt_v4)) {
+               for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+                       ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+                                 "check_packet_content");
+       }
+
+       tst_ctx->passed = true;
+}
  
-       if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
-                 "check_size", "size %u < %zu\n",
-                 size, sizeof(pkt_v4) + sizeof(*meta)))
-               return;
+#define BUF_SZ 9000
  
-       if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
-                 "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+                                    struct test_xdp_bpf2bpf *ftrace_skel,
+                                    int pkt_size)
+{
+       __u32 duration = 0, retval, size;
+       __u8 *buf, *buf_in;
+       int err;
+
+       if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+           !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
                 return;
  
-       if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
-                 "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+       buf_in = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
                 return;
  
-       if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
-                 "check_packet_content", "content not the same\n"))
+       buf = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+               free(buf_in);
                 return;
+       }
+
+       test_ctx.passed = false;
+       test_ctx.pkt_size = pkt_size;
+
+       memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+       if (pkt_size > sizeof(pkt_v4)) {
+               for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+                       buf_in[i + sizeof(pkt_v4)] = i;
+       }
+
+       /* Run test program */
+       err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
+       ASSERT_EQ(size, pkt_size, "ipv4 size");
+
+       /* Make sure bpf_xdp_output() was triggered and it sent the expected
+        * data to the perf ring buffer.
+        */
+       err = perf_buffer__poll(pb, 100);
  
-       *(bool *)ctx = true;
+       ASSERT_GE(err, 0, "perf_buffer__poll");
+       ASSERT_TRUE(test_ctx.passed, "test passed");
+       /* Verify test results */
+       ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+                 "fentry result");
+       ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+       free(buf);
+       free(buf_in);
  }
  
  void test_xdp_bpf2bpf(void)
  {
-       __u32 duration = 0, retval, size;
-       char buf[128];
         int err, pkt_fd, map_fd;
-       bool passed = false;
-       struct iphdr iph;
-       struct iptnl_info value4 = {.family = AF_INET};
+       int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+       struct iptnl_info value4 = {.family = AF_INET6};
         struct test_xdp *pkt_skel = NULL;
         struct test_xdp_bpf2bpf *ftrace_skel = NULL;
         struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -52,7 +109,7 @@ void test_xdp_bpf2bpf(void)
  
         /* Load XDP program to introspect */
         pkt_skel = test_xdp__open_and_load();
-       if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+       if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
                 return;
  
         pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -62,7 +119,7 @@ void test_xdp_bpf2bpf(void)
  
         /* Load trace program */
         ftrace_skel = test_xdp_bpf2bpf__open();
-       if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
                 goto out;
  
         /* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -77,50 +134,24 @@ void test_xdp_bpf2bpf(void)
         bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
  
         err = test_xdp_bpf2bpf__load(ftrace_skel);
-       if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
                 goto out;
  
         err = test_xdp_bpf2bpf__attach(ftrace_skel);
-       if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
                 goto out;
  
         /* Set up perf buffer */
-       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
-                             on_sample, NULL, &passed, NULL);
+       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+                             on_sample, NULL, &test_ctx, NULL);
         if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                 goto out;
  
-       /* Run test program */
-       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-                               buf, &size, &retval, &duration);
-       memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-       if (CHECK(err || retval != XDP_TX || size != 74 ||
-                 iph.protocol != IPPROTO_IPIP, "ipv4",
-                 "err %d errno %d retval %d size %d\n",
-                 err, errno, retval, size))
-               goto out;
-
-       /* Make sure bpf_xdp_output() was triggered and it sent the expected
-        * data to the perf ring buffer.
-        */
-       err = perf_buffer__poll(pb, 100);
-       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
-               goto out;
-
-       CHECK_FAIL(!passed);
-
-       /* Verify test results */
-       if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
-                 "result", "fentry failed err %llu\n",
-                 ftrace_skel->bss->test_result_fentry))
-               goto out;
-
-       CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
-             "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+       for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+               run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+                                        pkt_sizes[i]);
  out:
-       if (pb)
-               perf_buffer__free(pb);
+       perf_buffer__free(pb);
         test_xdp__destroy(pkt_skel);
         test_xdp_bpf2bpf__destroy(ftrace_skel);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c

index fd812bd43600abd58f0331d37e6117c752233518..13aabb3b6cf2c806ad5cb1a91be2a10405d2a914 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -3,11 +3,12 @@
  #include <linux/if_link.h>
  #include <test_progs.h>
  
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
  #include "test_xdp_with_cpumap_helpers.skel.h"
  
  #define IFINDEX_LO     1
  
-void serial_test_xdp_cpumap_attach(void)
+void test_xdp_with_cpumap_helpers(void)
  {
         struct test_xdp_with_cpumap_helpers *skel;
         struct bpf_prog_info info = {};
@@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void)
         err = bpf_map_update_elem(map_fd, &idx, &val, 0);
         ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
  
+       /* Try to attach BPF_XDP program with frags to cpumap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
  out_close:
         test_xdp_with_cpumap_helpers__destroy(skel);
  }
+
+void test_xdp_with_cpumap_frags_helpers(void)
+{
+       struct test_xdp_with_cpumap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       __u32 len = sizeof(info);
+       struct bpf_cpumap_val val = {
+               .qsize = 192,
+       };
+       int err, frags_prog_fd, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+               return;
+
+       frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       map_fd = bpf_map__fd(skel->maps.cpu_map);
+       err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = frags_prog_fd;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add program to cpumap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read cpumap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to cpumap entry prog_id");
+
+       /* Try to attach BPF_XDP program to cpumap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+       test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+       if (test__start_subtest("CPUMAP with programs in entries"))
+               test_xdp_with_cpumap_helpers();
+
+       if (test__start_subtest("CPUMAP with frags programs in entries"))
+               test_xdp_with_cpumap_frags_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c

index 3079d5568f8fac1185a94366015afea83c867087..2a784ccd313621e29dc908b79862d900ddcdbc6a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -4,6 +4,7 @@
  #include <test_progs.h>
  
  #include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
  #include "test_xdp_with_devmap_helpers.skel.h"
  
  #define IFINDEX_LO 1
@@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void)
         err = bpf_map_update_elem(map_fd, &idx, &val, 0);
         ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
  
+       /* Try to attach BPF_XDP program with frags to devmap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
  out_close:
         test_xdp_with_devmap_helpers__destroy(skel);
  }
@@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void)
         }
  }
  
+void test_xdp_with_devmap_frags_helpers(void)
+{
+       struct test_xdp_with_devmap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       struct bpf_devmap_val val = {
+               .ifindex = IFINDEX_LO,
+       };
+       __u32 len = sizeof(info);
+       int err, dm_fd_frags, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+               return;
+
+       dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       map_fd = bpf_map__fd(skel->maps.dm_ports);
+       err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = dm_fd_frags;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add frags program to devmap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read devmap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to devmap entry prog_id");
+
+       /* Try to attach BPF_XDP program to devmap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+       test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
  
  void serial_test_xdp_devmap_attach(void)
  {
         if (test__start_subtest("DEVMAP with programs in entries"))
                 test_xdp_with_devmap_helpers();
  
+       if (test__start_subtest("DEVMAP with frags programs in entries"))
+               test_xdp_with_devmap_frags_helpers();
+
         if (test__start_subtest("Verifier check of DEVMAP programs"))
                 test_neg_xdp_devmap_helpers();
  }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c

new file mode 100644 (file)

index 0000000..eafc877
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+       int i;
+
+       for (i = 0; i < AUTOBIND_LEN; i++) {
+               if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+                       return -1;
+       }
+
+       return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+       struct unix_sock *unix_sk = ctx->unix_sk;
+       int i, err;
+
+       if (!unix_sk || !unix_sk->addr)
+               return 0;
+
+       if (unix_sk->addr->name->sun_path[0])
+               return 0;
+
+       if (cmpname(unix_sk))
+               return 0;
+
+       for (i = 0; i < NR_CASES; i++) {
+               err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_setsockopt[i],
+                                    sizeof(sndbuf_setsockopt[i]));
+               if (err)
+                       break;
+
+               err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_getsockopt[i],
+                                    sizeof(sndbuf_getsockopt[i]));
+               if (err)
+                       break;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c

index c21e3f545371fc456c426ba24890a6f6b29a5076..e6aefae38894290e553baed5e5e5ccfbfb680023 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
                         BPF_SEQ_PRINTF(seq, " @");
  
                         for (i = 1; i < len; i++) {
-                               /* unix_mkname() tests this upper bound. */
+                               /* unix_validate_addr() tests this upper bound. */
                                 if (i >= sizeof(struct sockaddr_un))
                                         break;
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c

new file mode 100644 (file)

index 0000000..82a5c6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+       /* thread to activate trace programs for */
+       pid_t tgid;
+       /* return error from __init function */
+       int inject_error;
+       /* uffd monitored range start address */
+       void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+       struct task_struct *task = bpf_get_current_task_btf();
+
+       return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ *   load_module()
+ *     prepare_coming_module()
+ *       notifier_call(MODULE_STATE_COMING)
+ *         btf_parse_module()
+ *         btf_alloc_id()              // Visible to userspace at this point
+ *         list_add(btf_mod->list, &btf_modules)
+ *     do_init_module()
+ *       freeinit = kmalloc()
+ *       ret = mod->init()
+ *         bpf_prog_widen_race()
+ *           bpf_copy_from_user()
+ *             ...<sleep>...
+ *       if (ret < 0)
+ *         ...
+ *         free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module_live == false
+ *     return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module == true
+ *     <store module reference in btf_kfunc_tab or used_btf array>
+ *   ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+       char dst;
+
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we will attempt to block */
+       bpf_blocking = 1;
+       bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+       return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we finished blocking */
+       bpf_blocking = 2;
+       return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+       res_try_get_module = !!mod;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h

index e0f42601be9b8a00bf5ec34cc4c13a0162e8c860..1c1289ba5fc5e324867eb08058a239d98f52200a 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,8 @@
  #define AF_INET                        2
  #define AF_INET6               10
  
+#define SOL_SOCKET             1
+#define SO_SNDBUF              7
  #define __SO_ACCEPTCON         (1 << 16)
  
  #define SOL_TCP                        6
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c

new file mode 100644 (file)

index 0000000..b2a409e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       ctx_retval_value = ctx->retval;
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       ctx->retval = 0;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c

new file mode 100644 (file)

index 0000000..d6e5903
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EUNATCH))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c

index 68a5a9db928aa58f1c5ad9d1b0d129c530b54817..7e94412d47a5cac7b557c83c06647b1d3fd054b9 100644 (file)
--- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -7,12 +7,12 @@
  #include <bpf/bpf_endian.h>
  #include <bpf/bpf_helpers.h>
  
-struct bpf_map_def SEC("maps") sock_map = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __type(key, int);
+       __type(value, int);
+       __uint(max_entries, 2);
+} sock_map SEC(".maps");
  
  SEC("freplace/cls_redirect")
  int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c

new file mode 100644 (file)

index 0000000..4e8fed7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+       bpf_testmod_test_mod_kfunc(0);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c

index 8a8cf59017aac01662c260d2cfe40fbb9f3d1fda..5aecbb9fdc68def55859da680afd8e6db8755cc6 100644 (file)
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -1,13 +1,20 @@
  // SPDX-License-Identifier: GPL-2.0
  /* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
  #include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
  
  extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
  extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
                                   __u32 c, __u64 d) __ksym;
  
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
  SEC("tc")
  int kfunc_call_test2(struct __sk_buff *skb)
  {
@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
         return ret;
  }
  
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+       struct prog_test_ref_kfunc *pt;
+       unsigned long s = 0;
+       int ret = 0;
+
+       pt = bpf_kfunc_call_test_acquire(&s);
+       if (pt) {
+               if (pt->a != 42 || pt->b != 108)
+                       ret = -1;
+               bpf_kfunc_call_test_release(pt);
+       }
+       return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+       struct prog_test_pass1 p1 = {};
+       struct prog_test_pass2 p2 = {};
+       short a = 0;
+       __u64 b = 0;
+       long c = 0;
+       char d = 0;
+       int e = 0;
+
+       bpf_kfunc_call_test_pass_ctx(skb);
+       bpf_kfunc_call_test_pass1(&p1);
+       bpf_kfunc_call_test_pass2(&p2);
+
+       bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+       bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+       bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+       bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+       bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+       bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+       return 0;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c

new file mode 100644 (file)

index 0000000..def97f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ksym_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+       return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c

index 1612a32007b6955ced5373a44b5ce3c2ff173dd1..495990d355ef366e40b224002b0878f84aa389d2 100644 (file)
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
@@ -2,19 +2,19 @@
  #include <linux/bpf.h>
  #include <bpf/bpf_helpers.h>
  
-struct bpf_map_def SEC("maps") htab = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} htab SEC(".maps");
  
-struct bpf_map_def SEC("maps") array = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} array SEC(".maps");
  
  /* Sample program which should always load for testing control paths. */
  SEC(".text") int func()
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c

index 95d5b941bc1f8d46e9fcd5e42c4580ba2aeea337..c9abfe3a11afaa9181d48bf26c1a2b033ebe60ef 100644 (file)
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb)
  {
         void *data_end = (void *)(long) skb->data_end;
         void *data = (void *)(long) skb->data;
-       __u32 lport = skb->local_port;
-       __u32 rport = skb->remote_port;
         __u8 *d = data;
         int err;
  
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c

index 79c8139b63b803c45b72da131546c150ca9c81f1..d0298dccedcd1112671826f0fde74bd53216f776 100644 (file)
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx)
                  */
  
                 if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 if (((struct tcp_zerocopy_receive *)optval)->address != 0)
-                       return 0; /* EPERM, unexpected data */
+                       return 0; /* unexpected data */
  
                 return 1;
         }
  
         if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                 if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 ctx->retval = 0; /* Reset system call return value to zero */
  
@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
                  * bytes of data.
                  */
                 if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
  
                 return 1;
         }
  
         if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
  
         if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
  
         storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                      BPF_SK_STORAGE_GET_F_CREATE);
         if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
  
         if (!ctx->retval)
-               return 0; /* EPERM, kernel should not have handled
+               return 0; /* kernel should not have handled
                            * SOL_CUSTOM, something is wrong!
                            */
         ctx->retval = 0; /* Reset system call return value to zero */
@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                 /* Overwrite SO_SNDBUF value */
  
                 if (optval + sizeof(__u32) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 *(__u32 *)optval = 0x55AA;
                 ctx->optlen = 4;
@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                 /* Always use cubic */
  
                 if (optval + 5 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 memcpy(optval, "cubic", 5);
                 ctx->optlen = 5;
@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
         if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                 /* Original optlen is larger than PAGE_SIZE. */
                 if (ctx->optlen != page_size * 2)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
  
                 if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
  
                 /* Make sure we can trim the buffer. */
                 optval[0] = 0;
@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
                  * bytes of data.
                  */
                 if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
  
                 return 1;
         }
  
         if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
  
         if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
  
         storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                      BPF_SK_STORAGE_GET_F_CREATE);
         if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
  
         storage->val = optval[0];
         ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c

new file mode 100644 (file)

index 0000000..f00a973
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+                                  struct bpf_ct_opts___local *, u32),
+          void *ctx)
+{
+       struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+       struct bpf_sock_tuple bpf_tuple;
+       struct nf_conn *ct;
+
+       __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+       ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_bpf_tuple = opts_def.error;
+
+       opts_def.reserved[0] = 1;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.reserved[0] = 0;
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_reserved = opts_def.error;
+
+       opts_def.netns_id = -2;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_len_opts = opts_def.error;
+
+       opts_def.l4proto = IPPROTO_ICMP;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eproto_l4proto = opts_def.error;
+
+       opts_def.netns_id = 0xf00f;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enonet_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enoent_lookup = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+       nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+       return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+       nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c

index 160ead6c67b200e57b16506f94ed44eee6a955fc..07c94df136609cb7b3b0390b62c8dbe54f3cfd87 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -9,12 +9,15 @@ struct ipv_counts {
         unsigned int v6;
  };
  
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  struct bpf_map_def SEC("maps") btf_map = {
         .type = BPF_MAP_TYPE_ARRAY,
         .key_size = sizeof(int),
         .value_size = sizeof(struct ipv_counts),
         .max_entries = 4,
  };
+#pragma GCC diagnostic pop
  
  BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
  
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c

index 1884a5bd10f568a6cfd999c9a93f2529f13a539d..762671a2e90cb1c0eee59ea01435744fe2fa6bd8 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -9,6 +9,8 @@ struct ipv_counts {
         unsigned int v6;
  };
  
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  /* just to validate we can handle maps in multiple sections */
  struct bpf_map_def SEC("maps") btf_map_legacy = {
         .type = BPF_MAP_TYPE_ARRAY,
@@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
         .value_size = sizeof(long long),
         .max_entries = 4,
  };
+#pragma GCC diagnostic pop
  
  BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
  
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c

index 15e0f9945fe469fae6bde149ea445234b018f0ec..1dabb88f8cb48dad9ec3276ab030d8506d1d75ee 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -8,12 +8,12 @@ struct ipv_counts {
         unsigned int v6;
  };
  
-struct bpf_map_def SEC("maps") btf_map = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct ipv_counts),
-       .max_entries = 4,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct ipv_counts));
+       __uint(max_entries, 4);
+} btf_map SEC(".maps");
  
  __attribute__((noinline))
  int test_long_fname_2(void)
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c

index c304cd5b8cad78d8a86c21287b3bf949e082304e..37aacc66cd68ac9609ce2cbefa332206fbc0f579 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -10,12 +10,12 @@
  
  #define NUM_CGROUP_LEVELS      4
  
-struct bpf_map_def SEC("maps") cgroup_ids = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u64),
-       .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
  
  static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
  {
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c

new file mode 100644 (file)

index 0000000..9d58d61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+       return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c

index bf28814bfde5597a7a63b8ef93ed6051e30fe288..950a70b61e74689f62a4991956c7def0f6aed938 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -17,12 +17,12 @@
  #define THROTTLE_RATE_BPS (5 * 1000 * 1000)
  
  /* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(uint32_t),
-       .value_size = sizeof(uint64_t),
-       .max_entries = 1,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, uint32_t);
+       __type(value, uint64_t);
+       __uint(max_entries, 1);
+} flow_map SEC(".maps");
  
  static inline int throttle_flow(struct __sk_buff *skb)
  {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c

index cd747cd93dbe81382ee29c68a73d3cdc41107b0a..6edebce563b57e96b04a20b6d14dbcdca62152d5 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -16,12 +16,12 @@
  #include <bpf/bpf_helpers.h>
  #include <bpf/bpf_endian.h>
  
-struct bpf_map_def SEC("maps") results = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u32),
-       .max_entries = 3,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u32);
+       __uint(max_entries, 3);
+} results SEC(".maps");
  
  static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
                                            void *iph, __u32 ip_size,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c

index 199c61b7d062863389c2b4371f7c8e6171c3d323..53b64c9994500580271010eaf30dd4c60f1a710e 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
  {
         void *data_end = (void *)(long)xdp->data_end;
         void *data = (void *)(long)xdp->data;
-       unsigned int data_len;
+       int data_len = bpf_xdp_get_buff_len(xdp);
         int offset = 0;
  
         /* Data length determine test case */
-       data_len = data_end - data;
  
         if (data_len == 54) { /* sizeof(pkt_v4) */
                 offset = 4096; /* test too large offset */
@@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
         } else if (data_len == 64) {
                 offset = 128;
         } else if (data_len == 128) {
-               offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+               /* Max tail grow 3520 */
+               offset = 4096 - 256 - 320 - data_len;
+       } else if (data_len == 9000) {
+               offset = 10;
+       } else if (data_len == 9001) {
+               offset = 4096;
         } else {
                 return XDP_ABORTED; /* No matching test */
         }
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c

index b7448253d1359b7939370bba04525b20ffc2c367..ca68c038357c3b86ed79ba81fa0b2f9b997caad9 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -12,14 +12,38 @@
  SEC("xdp")
  int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
  {
-       void *data_end = (void *)(long)xdp->data_end;
-       void *data = (void *)(long)xdp->data;
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
         int offset = 0;
  
-       if (data_end - data == 54) /* sizeof(pkt_v4) */
+       switch (bpf_xdp_get_buff_len(xdp)) {
+       case 54:
+               /* sizeof(pkt_v4) */
                 offset = 256; /* shrink too much */
-       else
+               break;
+       case 9000:
+               /* non-linear buff test cases */
+               if (data + 1 > data_end)
+                       return XDP_DROP;
+
+               switch (data[0]) {
+               case 0:
+                       offset = 10;
+                       break;
+               case 1:
+                       offset = 4100;
+                       break;
+               case 2:
+                       offset = 8200;
+                       break;
+               default:
+                       return XDP_DROP;
+               }
+               break;
+       default:
                 offset = 20;
+               break;
+       }
         if (bpf_xdp_adjust_tail(xdp, 0 - offset))
                 return XDP_DROP;
         return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c

index 58cf4345f5cc9c34df8460ebfef07e0a2623cfc6..3379d303f41a99c664148ba5198942f8cabe37b5 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
         void *data = (void *)(long)xdp->data;
  
         meta.ifindex = xdp->rxq->dev->ifindex;
-       meta.pkt_len = data_end - data;
+       meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
         bpf_xdp_output(xdp, &perf_buf_map,
                        ((__u64) meta.pkt_len << 32) |
                        BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c

new file mode 100644 (file)

index 0000000..2a3496d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
+       __u8 val[16] = {};
+       __u32 offset;
+       int err;
+
+       if (data + sizeof(__u32) > data_end)
+               return XDP_DROP;
+
+       offset = *(__u32 *)data;
+       err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+               return XDP_DROP;
+
+       val[0] = 0xbb; /* update the marker */
+       val[15] = 0xbb;
+       err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c

new file mode 100644 (file)

index 0000000..62fb7cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO     1
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+       __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

index 532025057711e27aa3cb3cdc80332725f9b40147..48007f17dfa86af0f86235aab320b9b83647ddb0 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx)
         return XDP_PASS;
  }
  
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c

new file mode 100644 (file)

index 0000000..e1caf51
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c

index 1e6b9c38ea6d9231ec8c7a860fa9e176472f5b01..8ae11fab8316d8c203faa06c2ca2ed6c79f00a97 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
  
         return XDP_PASS;
  }
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index 76cd903117af1abf3868c396877a13d23594cd22..29bbaa58233cce6e58bcacc4cc4dee3ef19bff25 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -31,6 +31,7 @@
  #include <linux/if_ether.h>
  #include <linux/btf.h>
  
+#include <bpf/btf.h>
  #include <bpf/bpf.h>
  #include <bpf/libbpf.h>
  
@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
  static int skips;
  static bool verbose = false;
  
+struct kfunc_btf_id_pair {
+       const char *kfunc;
+       int insn_idx;
+};
+
  struct bpf_test {
         const char *descr;
         struct bpf_insn insns[MAX_INSNS];
@@ -92,6 +98,7 @@ struct bpf_test {
         int fixup_map_reuseport_array[MAX_FIXUPS];
         int fixup_map_ringbuf[MAX_FIXUPS];
         int fixup_map_timer[MAX_FIXUPS];
+       struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
         /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
          * Can be a tab-separated sequence of expected strings. An empty string
          * means no log verification.
@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
         int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
         int *fixup_map_ringbuf = test->fixup_map_ringbuf;
         int *fixup_map_timer = test->fixup_map_timer;
+       struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
  
         if (test->fill_helper) {
                 test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                         fixup_map_timer++;
                 } while (*fixup_map_timer);
         }
+
+       /* Patch in kfunc BTF IDs */
+       if (fixup_kfunc_btf_id->kfunc) {
+               struct btf *btf;
+               int btf_id;
+
+               do {
+                       btf_id = 0;
+                       btf = btf__load_vmlinux_btf();
+                       if (btf) {
+                               btf_id = btf__find_by_name_kind(btf,
+                                                               fixup_kfunc_btf_id->kfunc,
+                                                               BTF_KIND_FUNC);
+                               btf_id = btf_id < 0 ? 0 : btf_id;
+                       }
+                       btf__free(btf);
+                       prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+                       fixup_kfunc_btf_id++;
+               } while (fixup_kfunc_btf_id->kfunc);
+       }
  }
  
  struct libcap {
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c

index d7b74eb2833398275f9ae9e49b958003e9a8e989..829be2b9e08e1440729e51f02da66f20ab8755be 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -21,6 +21,81 @@
         .prog_type = BPF_PROG_TYPE_TRACEPOINT,
         .result  = ACCEPT,
  },
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail1", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail2", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail3", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 expected pointer to ctx, but got PTR",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_pass_ctx", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type UNKNOWN  must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+       },
+},
  {
         "calls: basic sanity",
         .insns = {
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c

index 0a5d23da486df50dc743c749fcf91711f0d9e8f9..ffa5502ad95ed3fb7712ed5655bb3242c191c83b 100644 (file)
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
                         return true;
                 case STAT_TEST_RX_FULL:
                         xsk_stat = stats.rx_ring_full;
-                       expected_stat -= RX_FULL_RXQSIZE;
+                       if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+                               expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+                       else
+                               expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
                         break;
                 case STAT_TEST_RX_FILL_EMPTY:
                         xsk_stat = stats.rx_fill_ring_empty_descs;
author	Jakub Kicinski <kuba@kernel.org>
	Mon, 24 Jan 2022 23:42:28 +0000 (15:42 -0800)
committer	Jakub Kicinski <kuba@kernel.org>
	Mon, 24 Jan 2022 23:42:29 +0000 (15:42 -0800)
Documentation/bpf/btf.rst		patch \| blob \| blame \| history
drivers/net/ethernet/marvell/mvneta.c		patch \| blob \| blame \| history
include/linux/bpf.h		patch \| blob \| blame \| history
include/linux/bpf_verifier.h		patch \| blob \| blame \| history
include/linux/btf.h		patch \| blob \| blame \| history
include/linux/btf_ids.h		patch \| blob \| blame \| history
include/linux/filter.h		patch \| blob \| blame \| history
include/linux/skbuff.h		patch \| blob \| blame \| history
include/net/netfilter/nf_conntrack_bpf.h	[new file with mode: 0644]	patch \| blob
include/net/xdp.h		patch \| blob \| blame \| history
include/uapi/linux/bpf.h		patch \| blob \| blame \| history
kernel/bpf/arraymap.c		patch \| blob \| blame \| history
kernel/bpf/btf.c		patch \| blob \| blame \| history
kernel/bpf/cgroup.c		patch \| blob \| blame \| history
kernel/bpf/core.c		patch \| blob \| blame \| history
kernel/bpf/cpumap.c		patch \| blob \| blame \| history
kernel/bpf/devmap.c		patch \| blob \| blame \| history
kernel/bpf/syscall.c		patch \| blob \| blame \| history
kernel/bpf/verifier.c		patch \| blob \| blame \| history
kernel/trace/bpf_trace.c		patch \| blob \| blame \| history
net/bpf/test_run.c		patch \| blob \| blame \| history
net/core/filter.c		patch \| blob \| blame \| history
net/core/net_namespace.c		patch \| blob \| blame \| history
net/core/sock_map.c		patch \| blob \| blame \| history
net/core/xdp.c		patch \| blob \| blame \| history
net/ipv4/bpf_tcp_ca.c		patch \| blob \| blame \| history
net/ipv4/tcp_bbr.c		patch \| blob \| blame \| history
net/ipv4/tcp_cubic.c		patch \| blob \| blame \| history
net/ipv4/tcp_dctcp.c		patch \| blob \| blame \| history
net/netfilter/Makefile		patch \| blob \| blame \| history
net/netfilter/nf_conntrack_bpf.c	[new file with mode: 0644]	patch \| blob
net/netfilter/nf_conntrack_core.c		patch \| blob \| blame \| history
net/unix/af_unix.c		patch \| blob \| blame \| history
samples/bpf/xdp1_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_adjust_tail_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_fwd_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_router_ipv4_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_rxq_info_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_sample_pkts_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_sample_user.c		patch \| blob \| blame \| history
samples/bpf/xdp_tx_iptunnel_user.c		patch \| blob \| blame \| history
samples/bpf/xdpsock_ctrl_proc.c		patch \| blob \| blame \| history
samples/bpf/xdpsock_user.c		patch \| blob \| blame \| history
samples/bpf/xsk_fwd.c		patch \| blob \| blame \| history
scripts/bpf_doc.py		patch \| blob \| blame \| history
security/device_cgroup.c		patch \| blob \| blame \| history
tools/bpf/bpftool/btf.c		patch \| blob \| blame \| history
tools/bpf/bpftool/cgroup.c		patch \| blob \| blame \| history
tools/bpf/bpftool/common.c		patch \| blob \| blame \| history
tools/bpf/bpftool/gen.c		patch \| blob \| blame \| history
tools/bpf/bpftool/link.c		patch \| blob \| blame \| history
tools/bpf/bpftool/main.c		patch \| blob \| blame \| history
tools/bpf/bpftool/main.h		patch \| blob \| blame \| history
tools/bpf/bpftool/map.c		patch \| blob \| blame \| history
tools/bpf/bpftool/net.c		patch \| blob \| blame \| history
tools/bpf/bpftool/pids.c		patch \| blob \| blame \| history
tools/bpf/bpftool/prog.c		patch \| blob \| blame \| history
tools/bpf/bpftool/struct_ops.c		patch \| blob \| blame \| history
tools/bpf/resolve_btfids/Makefile		patch \| blob \| blame \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| blame \| history
tools/lib/bpf/bpf.c		patch \| blob \| blame \| history
tools/lib/bpf/bpf.h		patch \| blob \| blame \| history
tools/lib/bpf/bpf_helpers.h		patch \| blob \| blame \| history
tools/lib/bpf/btf.c		patch \| blob \| blame \| history
tools/lib/bpf/btf.h		patch \| blob \| blame \| history
tools/lib/bpf/hashmap.c		patch \| blob \| blame \| history
tools/lib/bpf/libbpf.c		patch \| blob \| blame \| history
tools/lib/bpf/libbpf.h		patch \| blob \| blame \| history
tools/lib/bpf/libbpf.map		patch \| blob \| blame \| history
tools/lib/bpf/libbpf_legacy.h		patch \| blob \| blame \| history
tools/lib/bpf/netlink.c		patch \| blob \| blame \| history
tools/perf/util/bpf-loader.c		patch \| blob \| blame \| history
tools/perf/util/bpf_map.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| blame \| history
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/config		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/bind_perm.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/bpf_nf.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/btf.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/flow_dissector.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/global_data.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/global_data_init.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/kfunc_call.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/sockopt_sk.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/tailcalls.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/bpf_iter_unix.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/bpf_mod_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/bpf_tracing_net.h		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/freplace_cls_redirect.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/kfunc_call_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/kfunc_call_test.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/ksym_race.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/sample_map_ret0.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/sockmap_parse_prog.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/sockopt_sk.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_bpf_nf.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_btf_haskv.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_btf_newkv.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_btf_nokv.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_tc_edt.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_xdp_update_frags.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/test_verifier.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/verifier/calls.c		patch \| blob \| blame \| history
tools/testing/selftests/bpf/xdpxceiver.c		patch \| blob \| blame \| history